default	rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
EXTERN	OPENSSL_ia32cap_P
global	ossl_vaes_vpclmulqdq_capable

ALIGN	32
ossl_vaes_vpclmulqdq_capable:
	mov	rcx,QWORD[((OPENSSL_ia32cap_P+8))]

	mov	rdx,6600291188736
	xor	eax,eax
	and	rcx,rdx
	cmp	rcx,rdx
	cmove	rax,rcx
	DB	0F3h,0C3h		;repret

section	.text code align=64

global	ossl_aes_gcm_init_avx512

ALIGN	32
ossl_aes_gcm_init_avx512:

DB	243,15,30,250
	vpxorq	xmm16,xmm16,xmm16


	mov	eax,DWORD[240+rcx]
	cmp	eax,9
	je	NEAR $L$aes_128_0
	cmp	eax,11
	je	NEAR $L$aes_192_0
	cmp	eax,13
	je	NEAR $L$aes_256_0
	jmp	NEAR $L$exit_aes_0
ALIGN	32
$L$aes_128_0:
	vpxorq	xmm16,xmm16,XMMWORD[rcx]

	vaesenc	xmm16,xmm16,XMMWORD[16+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[32+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[48+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[64+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[80+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[96+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[112+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[128+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[144+rcx]

	vaesenclast	xmm16,xmm16,XMMWORD[160+rcx]
	jmp	NEAR $L$exit_aes_0
ALIGN	32
$L$aes_192_0:
	vpxorq	xmm16,xmm16,XMMWORD[rcx]

	vaesenc	xmm16,xmm16,XMMWORD[16+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[32+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[48+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[64+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[80+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[96+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[112+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[128+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[144+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[160+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[176+rcx]

	vaesenclast	xmm16,xmm16,XMMWORD[192+rcx]
	jmp	NEAR $L$exit_aes_0
ALIGN	32
$L$aes_256_0:
	vpxorq	xmm16,xmm16,XMMWORD[rcx]

	vaesenc	xmm16,xmm16,XMMWORD[16+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[32+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[48+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[64+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[80+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[96+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[112+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[128+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[144+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[160+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[176+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[192+rcx]

	vaesenc	xmm16,xmm16,XMMWORD[208+rcx]

	vaesenclast	xmm16,xmm16,XMMWORD[224+rcx]
	jmp	NEAR $L$exit_aes_0
$L$exit_aes_0:

	vpshufb	xmm16,xmm16,XMMWORD[SHUF_MASK]

	vmovdqa64	xmm2,xmm16
	vpsllq	xmm16,xmm16,1
	vpsrlq	xmm2,xmm2,63
	vmovdqa	xmm1,xmm2
	vpslldq	xmm2,xmm2,8
	vpsrldq	xmm1,xmm1,8
	vporq	xmm16,xmm16,xmm2

	vpshufd	xmm2,xmm1,36
	vpcmpeqd	xmm2,xmm2,XMMWORD[TWOONE]
	vpand	xmm2,xmm2,XMMWORD[POLY]
	vpxorq	xmm16,xmm16,xmm2

	vmovdqu64	XMMWORD[336+rdx],xmm16
	vshufi32x4	ymm4,ymm16,ymm16,0x00
	vmovdqa	ymm3,ymm4

	vpclmulqdq	ymm0,ymm3,ymm4,0x11
	vpclmulqdq	ymm1,ymm3,ymm4,0x00
	vpclmulqdq	ymm2,ymm3,ymm4,0x01
	vpclmulqdq	ymm3,ymm3,ymm4,0x10
	vpxorq	ymm3,ymm3,ymm2

	vpsrldq	ymm2,ymm3,8
	vpslldq	ymm3,ymm3,8
	vpxorq	ymm0,ymm0,ymm2
	vpxorq	ymm3,ymm3,ymm1



	vmovdqu64	ymm2,YMMWORD[POLY2]

	vpclmulqdq	ymm1,ymm2,ymm3,0x01
	vpslldq	ymm1,ymm1,8
	vpxorq	ymm3,ymm3,ymm1



	vpclmulqdq	ymm1,ymm2,ymm3,0x00
	vpsrldq	ymm1,ymm1,4
	vpclmulqdq	ymm3,ymm2,ymm3,0x10
	vpslldq	ymm3,ymm3,4

	vpternlogq	ymm3,ymm0,ymm1,0x96

	vmovdqu64	XMMWORD[320+rdx],xmm3
	vinserti64x2	ymm4,ymm3,xmm16,1
	vmovdqa64	ymm5,ymm4

	vpclmulqdq	ymm0,ymm4,ymm3,0x11
	vpclmulqdq	ymm1,ymm4,ymm3,0x00
	vpclmulqdq	ymm2,ymm4,ymm3,0x01
	vpclmulqdq	ymm4,ymm4,ymm3,0x10
	vpxorq	ymm4,ymm4,ymm2

	vpsrldq	ymm2,ymm4,8
	vpslldq	ymm4,ymm4,8
	vpxorq	ymm0,ymm0,ymm2
	vpxorq	ymm4,ymm4,ymm1



	vmovdqu64	ymm2,YMMWORD[POLY2]

	vpclmulqdq	ymm1,ymm2,ymm4,0x01
	vpslldq	ymm1,ymm1,8
	vpxorq	ymm4,ymm4,ymm1



	vpclmulqdq	ymm1,ymm2,ymm4,0x00
	vpsrldq	ymm1,ymm1,4
	vpclmulqdq	ymm4,ymm2,ymm4,0x10
	vpslldq	ymm4,ymm4,4

	vpternlogq	ymm4,ymm0,ymm1,0x96

	vmovdqu64	YMMWORD[288+rdx],ymm4

	vinserti64x4	zmm4,zmm4,ymm5,1


	vshufi64x2	zmm3,zmm4,zmm4,0x00
	vmovdqa64	zmm5,zmm4

	vpclmulqdq	zmm0,zmm4,zmm3,0x11
	vpclmulqdq	zmm1,zmm4,zmm3,0x00
	vpclmulqdq	zmm2,zmm4,zmm3,0x01
	vpclmulqdq	zmm4,zmm4,zmm3,0x10
	vpxorq	zmm4,zmm4,zmm2

	vpsrldq	zmm2,zmm4,8
	vpslldq	zmm4,zmm4,8
	vpxorq	zmm0,zmm0,zmm2
	vpxorq	zmm4,zmm4,zmm1



	vmovdqu64	zmm2,ZMMWORD[POLY2]

	vpclmulqdq	zmm1,zmm2,zmm4,0x01
	vpslldq	zmm1,zmm1,8
	vpxorq	zmm4,zmm4,zmm1



	vpclmulqdq	zmm1,zmm2,zmm4,0x00
	vpsrldq	zmm1,zmm1,4
	vpclmulqdq	zmm4,zmm2,zmm4,0x10
	vpslldq	zmm4,zmm4,4

	vpternlogq	zmm4,zmm0,zmm1,0x96

	vmovdqu64	ZMMWORD[224+rdx],zmm4
	vshufi64x2	zmm3,zmm4,zmm4,0x00

	vpclmulqdq	zmm0,zmm5,zmm3,0x11
	vpclmulqdq	zmm1,zmm5,zmm3,0x00
	vpclmulqdq	zmm2,zmm5,zmm3,0x01
	vpclmulqdq	zmm5,zmm5,zmm3,0x10
	vpxorq	zmm5,zmm5,zmm2

	vpsrldq	zmm2,zmm5,8
	vpslldq	zmm5,zmm5,8
	vpxorq	zmm0,zmm0,zmm2
	vpxorq	zmm5,zmm5,zmm1



	vmovdqu64	zmm2,ZMMWORD[POLY2]

	vpclmulqdq	zmm1,zmm2,zmm5,0x01
	vpslldq	zmm1,zmm1,8
	vpxorq	zmm5,zmm5,zmm1



	vpclmulqdq	zmm1,zmm2,zmm5,0x00
	vpsrldq	zmm1,zmm1,4
	vpclmulqdq	zmm5,zmm2,zmm5,0x10
	vpslldq	zmm5,zmm5,4

	vpternlogq	zmm5,zmm0,zmm1,0x96

	vmovdqu64	ZMMWORD[160+rdx],zmm5

	vpclmulqdq	zmm0,zmm4,zmm3,0x11
	vpclmulqdq	zmm1,zmm4,zmm3,0x00
	vpclmulqdq	zmm2,zmm4,zmm3,0x01
	vpclmulqdq	zmm4,zmm4,zmm3,0x10
	vpxorq	zmm4,zmm4,zmm2

	vpsrldq	zmm2,zmm4,8
	vpslldq	zmm4,zmm4,8
	vpxorq	zmm0,zmm0,zmm2
	vpxorq	zmm4,zmm4,zmm1



	vmovdqu64	zmm2,ZMMWORD[POLY2]

	vpclmulqdq	zmm1,zmm2,zmm4,0x01
	vpslldq	zmm1,zmm1,8
	vpxorq	zmm4,zmm4,zmm1



	vpclmulqdq	zmm1,zmm2,zmm4,0x00
	vpsrldq	zmm1,zmm1,4
	vpclmulqdq	zmm4,zmm2,zmm4,0x10
	vpslldq	zmm4,zmm4,4

	vpternlogq	zmm4,zmm0,zmm1,0x96

	vmovdqu64	ZMMWORD[96+rdx],zmm4
	vzeroupper
$L$abort_init:
	DB	0F3h,0C3h		;repret


global	ossl_aes_gcm_setiv_avx512

ALIGN	32
ossl_aes_gcm_setiv_avx512:

$L$setiv_seh_begin:
DB	243,15,30,250
	push	rbx

$L$setiv_seh_push_rbx:
	push	rbp

$L$setiv_seh_push_rbp:
	push	r12

$L$setiv_seh_push_r12:
	push	r13

$L$setiv_seh_push_r13:
	push	r14

$L$setiv_seh_push_r14:
	push	r15

$L$setiv_seh_push_r15:
	push	rdi
$L$setiv_seh_push_rdi:
	push	rsi
$L$setiv_seh_push_rsi:

	sub	rsp,168
$L$setiv_seh_allocstack_xmm:










	lea	rbp,[160+rsp]

$L$setiv_seh_setfp:
	vmovdqu	XMMWORD[rsp],xmm6
$L$setiv_seh_save_xmm6:
	vmovdqu	XMMWORD[16+rsp],xmm7
$L$setiv_seh_save_xmm7:
	vmovdqu	XMMWORD[32+rsp],xmm8
$L$setiv_seh_save_xmm8:
	vmovdqu	XMMWORD[48+rsp],xmm9
$L$setiv_seh_save_xmm9:
	vmovdqu	XMMWORD[64+rsp],xmm10
$L$setiv_seh_save_xmm10:
	vmovdqu	XMMWORD[80+rsp],xmm11
$L$setiv_seh_save_xmm11:
	vmovdqu	XMMWORD[96+rsp],xmm12
$L$setiv_seh_save_xmm12:
	vmovdqu	XMMWORD[112+rsp],xmm13
$L$setiv_seh_save_xmm13:
	vmovdqu	XMMWORD[128+rsp],xmm14
$L$setiv_seh_save_xmm14:
	vmovdqu	XMMWORD[144+rsp],xmm15
$L$setiv_seh_save_xmm15:

$L$setiv_seh_prolog_end:
	sub	rsp,816
	and	rsp,(-64)
	cmp	r9,12
	je	NEAR iv_len_12_init_IV
	vpxor	xmm2,xmm2,xmm2
	mov	r10,r8
	mov	r11,r9
	or	r11,r11
	jz	NEAR $L$_CALC_AAD_done_1

	xor	rbx,rbx
	vmovdqa64	zmm16,ZMMWORD[SHUF_MASK]

$L$_get_AAD_loop48x16_1:
	cmp	r11,768
	jl	NEAR $L$_exit_AAD_loop48x16_1
	vmovdqu64	zmm11,ZMMWORD[r10]
	vmovdqu64	zmm3,ZMMWORD[64+r10]
	vmovdqu64	zmm4,ZMMWORD[128+r10]
	vmovdqu64	zmm5,ZMMWORD[192+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	test	rbx,rbx
	jnz	NEAR $L$_skip_hkeys_precomputation_2

	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vmovdqu64	ZMMWORD[704+rsp],zmm1

	vmovdqu64	zmm9,ZMMWORD[224+rdx]
	vmovdqu64	ZMMWORD[640+rsp],zmm9


	vshufi64x2	zmm9,zmm9,zmm9,0x00

	vmovdqu64	zmm10,ZMMWORD[160+rdx]
	vmovdqu64	ZMMWORD[576+rsp],zmm10

	vmovdqu64	zmm12,ZMMWORD[96+rdx]
	vmovdqu64	ZMMWORD[512+rsp],zmm12

	vpclmulqdq	zmm13,zmm10,zmm9,0x11
	vpclmulqdq	zmm15,zmm10,zmm9,0x00
	vpclmulqdq	zmm17,zmm10,zmm9,0x01
	vpclmulqdq	zmm10,zmm10,zmm9,0x10
	vpxorq	zmm10,zmm10,zmm17

	vpsrldq	zmm17,zmm10,8
	vpslldq	zmm10,zmm10,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm10,zmm10,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm10,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm10,zmm10,zmm15



	vpclmulqdq	zmm15,zmm17,zmm10,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm10,zmm17,zmm10,0x10
	vpslldq	zmm10,zmm10,4

	vpternlogq	zmm10,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[448+rsp],zmm10

	vpclmulqdq	zmm13,zmm12,zmm9,0x11
	vpclmulqdq	zmm15,zmm12,zmm9,0x00
	vpclmulqdq	zmm17,zmm12,zmm9,0x01
	vpclmulqdq	zmm12,zmm12,zmm9,0x10
	vpxorq	zmm12,zmm12,zmm17

	vpsrldq	zmm17,zmm12,8
	vpslldq	zmm12,zmm12,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm12,zmm12,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm12,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm12,zmm12,zmm15



	vpclmulqdq	zmm15,zmm17,zmm12,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm12,zmm17,zmm12,0x10
	vpslldq	zmm12,zmm12,4

	vpternlogq	zmm12,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[384+rsp],zmm12

	vpclmulqdq	zmm13,zmm10,zmm9,0x11
	vpclmulqdq	zmm15,zmm10,zmm9,0x00
	vpclmulqdq	zmm17,zmm10,zmm9,0x01
	vpclmulqdq	zmm10,zmm10,zmm9,0x10
	vpxorq	zmm10,zmm10,zmm17

	vpsrldq	zmm17,zmm10,8
	vpslldq	zmm10,zmm10,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm10,zmm10,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm10,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm10,zmm10,zmm15



	vpclmulqdq	zmm15,zmm17,zmm10,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm10,zmm17,zmm10,0x10
	vpslldq	zmm10,zmm10,4

	vpternlogq	zmm10,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[320+rsp],zmm10

	vpclmulqdq	zmm13,zmm12,zmm9,0x11
	vpclmulqdq	zmm15,zmm12,zmm9,0x00
	vpclmulqdq	zmm17,zmm12,zmm9,0x01
	vpclmulqdq	zmm12,zmm12,zmm9,0x10
	vpxorq	zmm12,zmm12,zmm17

	vpsrldq	zmm17,zmm12,8
	vpslldq	zmm12,zmm12,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm12,zmm12,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm12,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm12,zmm12,zmm15



	vpclmulqdq	zmm15,zmm17,zmm12,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm12,zmm17,zmm12,0x10
	vpslldq	zmm12,zmm12,4

	vpternlogq	zmm12,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[256+rsp],zmm12

	vpclmulqdq	zmm13,zmm10,zmm9,0x11
	vpclmulqdq	zmm15,zmm10,zmm9,0x00
	vpclmulqdq	zmm17,zmm10,zmm9,0x01
	vpclmulqdq	zmm10,zmm10,zmm9,0x10
	vpxorq	zmm10,zmm10,zmm17

	vpsrldq	zmm17,zmm10,8
	vpslldq	zmm10,zmm10,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm10,zmm10,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm10,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm10,zmm10,zmm15



	vpclmulqdq	zmm15,zmm17,zmm10,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm10,zmm17,zmm10,0x10
	vpslldq	zmm10,zmm10,4

	vpternlogq	zmm10,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[192+rsp],zmm10

	vpclmulqdq	zmm13,zmm12,zmm9,0x11
	vpclmulqdq	zmm15,zmm12,zmm9,0x00
	vpclmulqdq	zmm17,zmm12,zmm9,0x01
	vpclmulqdq	zmm12,zmm12,zmm9,0x10
	vpxorq	zmm12,zmm12,zmm17

	vpsrldq	zmm17,zmm12,8
	vpslldq	zmm12,zmm12,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm12,zmm12,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm12,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm12,zmm12,zmm15



	vpclmulqdq	zmm15,zmm17,zmm12,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm12,zmm17,zmm12,0x10
	vpslldq	zmm12,zmm12,4

	vpternlogq	zmm12,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[128+rsp],zmm12

	vpclmulqdq	zmm13,zmm10,zmm9,0x11
	vpclmulqdq	zmm15,zmm10,zmm9,0x00
	vpclmulqdq	zmm17,zmm10,zmm9,0x01
	vpclmulqdq	zmm10,zmm10,zmm9,0x10
	vpxorq	zmm10,zmm10,zmm17

	vpsrldq	zmm17,zmm10,8
	vpslldq	zmm10,zmm10,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm10,zmm10,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm10,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm10,zmm10,zmm15



	vpclmulqdq	zmm15,zmm17,zmm10,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm10,zmm17,zmm10,0x10
	vpslldq	zmm10,zmm10,4

	vpternlogq	zmm10,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[64+rsp],zmm10

	vpclmulqdq	zmm13,zmm12,zmm9,0x11
	vpclmulqdq	zmm15,zmm12,zmm9,0x00
	vpclmulqdq	zmm17,zmm12,zmm9,0x01
	vpclmulqdq	zmm12,zmm12,zmm9,0x10
	vpxorq	zmm12,zmm12,zmm17

	vpsrldq	zmm17,zmm12,8
	vpslldq	zmm12,zmm12,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm12,zmm12,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm12,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm12,zmm12,zmm15



	vpclmulqdq	zmm15,zmm17,zmm12,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm12,zmm17,zmm12,0x10
	vpslldq	zmm12,zmm12,4

	vpternlogq	zmm12,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[rsp],zmm12
$L$_skip_hkeys_precomputation_2:
	mov	rbx,1
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm19,ZMMWORD[rsp]
	vpclmulqdq	zmm1,zmm11,zmm19,0x11
	vpclmulqdq	zmm9,zmm11,zmm19,0x00
	vpclmulqdq	zmm10,zmm11,zmm19,0x01
	vpclmulqdq	zmm12,zmm11,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[64+rsp]
	vpclmulqdq	zmm13,zmm3,zmm19,0x11
	vpclmulqdq	zmm15,zmm3,zmm19,0x00
	vpclmulqdq	zmm17,zmm3,zmm19,0x01
	vpclmulqdq	zmm18,zmm3,zmm19,0x10
	vpxorq	zmm7,zmm10,zmm17
	vpxorq	zmm6,zmm1,zmm13
	vpxorq	zmm8,zmm9,zmm15
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm19,ZMMWORD[128+rsp]
	vpclmulqdq	zmm1,zmm4,zmm19,0x11
	vpclmulqdq	zmm9,zmm4,zmm19,0x00
	vpclmulqdq	zmm10,zmm4,zmm19,0x01
	vpclmulqdq	zmm12,zmm4,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[192+rsp]
	vpclmulqdq	zmm13,zmm5,zmm19,0x11
	vpclmulqdq	zmm15,zmm5,zmm19,0x00
	vpclmulqdq	zmm17,zmm5,zmm19,0x01
	vpclmulqdq	zmm18,zmm5,zmm19,0x10

	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm11,ZMMWORD[256+r10]
	vmovdqu64	zmm3,ZMMWORD[320+r10]
	vmovdqu64	zmm4,ZMMWORD[384+r10]
	vmovdqu64	zmm5,ZMMWORD[448+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	vmovdqu64	zmm19,ZMMWORD[256+rsp]
	vpclmulqdq	zmm1,zmm11,zmm19,0x11
	vpclmulqdq	zmm9,zmm11,zmm19,0x00
	vpclmulqdq	zmm10,zmm11,zmm19,0x01
	vpclmulqdq	zmm12,zmm11,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[320+rsp]
	vpclmulqdq	zmm13,zmm3,zmm19,0x11
	vpclmulqdq	zmm15,zmm3,zmm19,0x00
	vpclmulqdq	zmm17,zmm3,zmm19,0x01
	vpclmulqdq	zmm18,zmm3,zmm19,0x10
	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm19,ZMMWORD[384+rsp]
	vpclmulqdq	zmm1,zmm4,zmm19,0x11
	vpclmulqdq	zmm9,zmm4,zmm19,0x00
	vpclmulqdq	zmm10,zmm4,zmm19,0x01
	vpclmulqdq	zmm12,zmm4,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[448+rsp]
	vpclmulqdq	zmm13,zmm5,zmm19,0x11
	vpclmulqdq	zmm15,zmm5,zmm19,0x00
	vpclmulqdq	zmm17,zmm5,zmm19,0x01
	vpclmulqdq	zmm18,zmm5,zmm19,0x10

	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm11,ZMMWORD[512+r10]
	vmovdqu64	zmm3,ZMMWORD[576+r10]
	vmovdqu64	zmm4,ZMMWORD[640+r10]
	vmovdqu64	zmm5,ZMMWORD[704+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	vmovdqu64	zmm19,ZMMWORD[512+rsp]
	vpclmulqdq	zmm1,zmm11,zmm19,0x11
	vpclmulqdq	zmm9,zmm11,zmm19,0x00
	vpclmulqdq	zmm10,zmm11,zmm19,0x01
	vpclmulqdq	zmm12,zmm11,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[576+rsp]
	vpclmulqdq	zmm13,zmm3,zmm19,0x11
	vpclmulqdq	zmm15,zmm3,zmm19,0x00
	vpclmulqdq	zmm17,zmm3,zmm19,0x01
	vpclmulqdq	zmm18,zmm3,zmm19,0x10
	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm19,ZMMWORD[640+rsp]
	vpclmulqdq	zmm1,zmm4,zmm19,0x11
	vpclmulqdq	zmm9,zmm4,zmm19,0x00
	vpclmulqdq	zmm10,zmm4,zmm19,0x01
	vpclmulqdq	zmm12,zmm4,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[704+rsp]
	vpclmulqdq	zmm13,zmm5,zmm19,0x11
	vpclmulqdq	zmm15,zmm5,zmm19,0x00
	vpclmulqdq	zmm17,zmm5,zmm19,0x01
	vpclmulqdq	zmm18,zmm5,zmm19,0x10

	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96

	vpsrldq	zmm1,zmm7,8
	vpslldq	zmm9,zmm7,8
	vpxorq	zmm6,zmm6,zmm1
	vpxorq	zmm8,zmm8,zmm9
	vextracti64x4	ymm1,zmm6,1
	vpxorq	ymm6,ymm6,ymm1
	vextracti32x4	xmm1,ymm6,1
	vpxorq	xmm6,xmm6,xmm1
	vextracti64x4	ymm9,zmm8,1
	vpxorq	ymm8,ymm8,ymm9
	vextracti32x4	xmm9,ymm8,1
	vpxorq	xmm8,xmm8,xmm9
	vmovdqa64	xmm10,XMMWORD[POLY2]


	vpclmulqdq	xmm1,xmm10,xmm8,0x01
	vpslldq	xmm1,xmm1,8
	vpxorq	xmm1,xmm8,xmm1


	vpclmulqdq	xmm9,xmm10,xmm1,0x00
	vpsrldq	xmm9,xmm9,4
	vpclmulqdq	xmm2,xmm10,xmm1,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm9,xmm6,0x96

	sub	r11,768
	je	NEAR $L$_CALC_AAD_done_1

	add	r10,768
	jmp	NEAR $L$_get_AAD_loop48x16_1

$L$_exit_AAD_loop48x16_1:

	cmp	r11,512
	jl	NEAR $L$_less_than_32x16_1

	vmovdqu64	zmm11,ZMMWORD[r10]
	vmovdqu64	zmm3,ZMMWORD[64+r10]
	vmovdqu64	zmm4,ZMMWORD[128+r10]
	vmovdqu64	zmm5,ZMMWORD[192+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	test	rbx,rbx
	jnz	NEAR $L$_skip_hkeys_precomputation_3

	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vmovdqu64	ZMMWORD[704+rsp],zmm1

	vmovdqu64	zmm9,ZMMWORD[224+rdx]
	vmovdqu64	ZMMWORD[640+rsp],zmm9


	vshufi64x2	zmm9,zmm9,zmm9,0x00

	vmovdqu64	zmm10,ZMMWORD[160+rdx]
	vmovdqu64	ZMMWORD[576+rsp],zmm10

	vmovdqu64	zmm12,ZMMWORD[96+rdx]
	vmovdqu64	ZMMWORD[512+rsp],zmm12

	vpclmulqdq	zmm13,zmm10,zmm9,0x11
	vpclmulqdq	zmm15,zmm10,zmm9,0x00
	vpclmulqdq	zmm17,zmm10,zmm9,0x01
	vpclmulqdq	zmm10,zmm10,zmm9,0x10
	vpxorq	zmm10,zmm10,zmm17

	vpsrldq	zmm17,zmm10,8
	vpslldq	zmm10,zmm10,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm10,zmm10,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm10,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm10,zmm10,zmm15



	vpclmulqdq	zmm15,zmm17,zmm10,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm10,zmm17,zmm10,0x10
	vpslldq	zmm10,zmm10,4

	vpternlogq	zmm10,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[448+rsp],zmm10

	vpclmulqdq	zmm13,zmm12,zmm9,0x11
	vpclmulqdq	zmm15,zmm12,zmm9,0x00
	vpclmulqdq	zmm17,zmm12,zmm9,0x01
	vpclmulqdq	zmm12,zmm12,zmm9,0x10
	vpxorq	zmm12,zmm12,zmm17

	vpsrldq	zmm17,zmm12,8
	vpslldq	zmm12,zmm12,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm12,zmm12,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm12,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm12,zmm12,zmm15



	vpclmulqdq	zmm15,zmm17,zmm12,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm12,zmm17,zmm12,0x10
	vpslldq	zmm12,zmm12,4

	vpternlogq	zmm12,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[384+rsp],zmm12

	vpclmulqdq	zmm13,zmm10,zmm9,0x11
	vpclmulqdq	zmm15,zmm10,zmm9,0x00
	vpclmulqdq	zmm17,zmm10,zmm9,0x01
	vpclmulqdq	zmm10,zmm10,zmm9,0x10
	vpxorq	zmm10,zmm10,zmm17

	vpsrldq	zmm17,zmm10,8
	vpslldq	zmm10,zmm10,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm10,zmm10,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm10,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm10,zmm10,zmm15



	vpclmulqdq	zmm15,zmm17,zmm10,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm10,zmm17,zmm10,0x10
	vpslldq	zmm10,zmm10,4

	vpternlogq	zmm10,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[320+rsp],zmm10

	vpclmulqdq	zmm13,zmm12,zmm9,0x11
	vpclmulqdq	zmm15,zmm12,zmm9,0x00
	vpclmulqdq	zmm17,zmm12,zmm9,0x01
	vpclmulqdq	zmm12,zmm12,zmm9,0x10
	vpxorq	zmm12,zmm12,zmm17

	vpsrldq	zmm17,zmm12,8
	vpslldq	zmm12,zmm12,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm12,zmm12,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm12,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm12,zmm12,zmm15



	vpclmulqdq	zmm15,zmm17,zmm12,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm12,zmm17,zmm12,0x10
	vpslldq	zmm12,zmm12,4

	vpternlogq	zmm12,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[256+rsp],zmm12
$L$_skip_hkeys_precomputation_3:
	mov	rbx,1
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm19,ZMMWORD[256+rsp]
	vpclmulqdq	zmm1,zmm11,zmm19,0x11
	vpclmulqdq	zmm9,zmm11,zmm19,0x00
	vpclmulqdq	zmm10,zmm11,zmm19,0x01
	vpclmulqdq	zmm12,zmm11,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[320+rsp]
	vpclmulqdq	zmm13,zmm3,zmm19,0x11
	vpclmulqdq	zmm15,zmm3,zmm19,0x00
	vpclmulqdq	zmm17,zmm3,zmm19,0x01
	vpclmulqdq	zmm18,zmm3,zmm19,0x10
	vpxorq	zmm7,zmm10,zmm17
	vpxorq	zmm6,zmm1,zmm13
	vpxorq	zmm8,zmm9,zmm15
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm19,ZMMWORD[384+rsp]
	vpclmulqdq	zmm1,zmm4,zmm19,0x11
	vpclmulqdq	zmm9,zmm4,zmm19,0x00
	vpclmulqdq	zmm10,zmm4,zmm19,0x01
	vpclmulqdq	zmm12,zmm4,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[448+rsp]
	vpclmulqdq	zmm13,zmm5,zmm19,0x11
	vpclmulqdq	zmm15,zmm5,zmm19,0x00
	vpclmulqdq	zmm17,zmm5,zmm19,0x01
	vpclmulqdq	zmm18,zmm5,zmm19,0x10

	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm11,ZMMWORD[256+r10]
	vmovdqu64	zmm3,ZMMWORD[320+r10]
	vmovdqu64	zmm4,ZMMWORD[384+r10]
	vmovdqu64	zmm5,ZMMWORD[448+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	vmovdqu64	zmm19,ZMMWORD[512+rsp]
	vpclmulqdq	zmm1,zmm11,zmm19,0x11
	vpclmulqdq	zmm9,zmm11,zmm19,0x00
	vpclmulqdq	zmm10,zmm11,zmm19,0x01
	vpclmulqdq	zmm12,zmm11,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[576+rsp]
	vpclmulqdq	zmm13,zmm3,zmm19,0x11
	vpclmulqdq	zmm15,zmm3,zmm19,0x00
	vpclmulqdq	zmm17,zmm3,zmm19,0x01
	vpclmulqdq	zmm18,zmm3,zmm19,0x10
	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm19,ZMMWORD[640+rsp]
	vpclmulqdq	zmm1,zmm4,zmm19,0x11
	vpclmulqdq	zmm9,zmm4,zmm19,0x00
	vpclmulqdq	zmm10,zmm4,zmm19,0x01
	vpclmulqdq	zmm12,zmm4,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[704+rsp]
	vpclmulqdq	zmm13,zmm5,zmm19,0x11
	vpclmulqdq	zmm15,zmm5,zmm19,0x00
	vpclmulqdq	zmm17,zmm5,zmm19,0x01
	vpclmulqdq	zmm18,zmm5,zmm19,0x10

	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96

	vpsrldq	zmm1,zmm7,8
	vpslldq	zmm9,zmm7,8
	vpxorq	zmm6,zmm6,zmm1
	vpxorq	zmm8,zmm8,zmm9
	vextracti64x4	ymm1,zmm6,1
	vpxorq	ymm6,ymm6,ymm1
	vextracti32x4	xmm1,ymm6,1
	vpxorq	xmm6,xmm6,xmm1
	vextracti64x4	ymm9,zmm8,1
	vpxorq	ymm8,ymm8,ymm9
	vextracti32x4	xmm9,ymm8,1
	vpxorq	xmm8,xmm8,xmm9
	vmovdqa64	xmm10,XMMWORD[POLY2]


	vpclmulqdq	xmm1,xmm10,xmm8,0x01
	vpslldq	xmm1,xmm1,8
	vpxorq	xmm1,xmm8,xmm1


	vpclmulqdq	xmm9,xmm10,xmm1,0x00
	vpsrldq	xmm9,xmm9,4
	vpclmulqdq	xmm2,xmm10,xmm1,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm9,xmm6,0x96

	sub	r11,512
	je	NEAR $L$_CALC_AAD_done_1

	add	r10,512
	jmp	NEAR $L$_less_than_16x16_1

$L$_less_than_32x16_1:
	cmp	r11,256
	jl	NEAR $L$_less_than_16x16_1

	vmovdqu64	zmm11,ZMMWORD[r10]
	vmovdqu64	zmm3,ZMMWORD[64+r10]
	vmovdqu64	zmm4,ZMMWORD[128+r10]
	vmovdqu64	zmm5,ZMMWORD[192+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm19,ZMMWORD[96+rdx]
	vpclmulqdq	zmm1,zmm11,zmm19,0x11
	vpclmulqdq	zmm9,zmm11,zmm19,0x00
	vpclmulqdq	zmm10,zmm11,zmm19,0x01
	vpclmulqdq	zmm12,zmm11,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[160+rdx]
	vpclmulqdq	zmm13,zmm3,zmm19,0x11
	vpclmulqdq	zmm15,zmm3,zmm19,0x00
	vpclmulqdq	zmm17,zmm3,zmm19,0x01
	vpclmulqdq	zmm18,zmm3,zmm19,0x10
	vpxorq	zmm7,zmm10,zmm17
	vpxorq	zmm6,zmm1,zmm13
	vpxorq	zmm8,zmm9,zmm15
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm19,ZMMWORD[224+rdx]
	vpclmulqdq	zmm1,zmm4,zmm19,0x11
	vpclmulqdq	zmm9,zmm4,zmm19,0x00
	vpclmulqdq	zmm10,zmm4,zmm19,0x01
	vpclmulqdq	zmm12,zmm4,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[288+rdx]
	vpclmulqdq	zmm13,zmm5,zmm19,0x11
	vpclmulqdq	zmm15,zmm5,zmm19,0x00
	vpclmulqdq	zmm17,zmm5,zmm19,0x01
	vpclmulqdq	zmm18,zmm5,zmm19,0x10

	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96

	vpsrldq	zmm1,zmm7,8
	vpslldq	zmm9,zmm7,8
	vpxorq	zmm6,zmm6,zmm1
	vpxorq	zmm8,zmm8,zmm9
	vextracti64x4	ymm1,zmm6,1
	vpxorq	ymm6,ymm6,ymm1
	vextracti32x4	xmm1,ymm6,1
	vpxorq	xmm6,xmm6,xmm1
	vextracti64x4	ymm9,zmm8,1
	vpxorq	ymm8,ymm8,ymm9
	vextracti32x4	xmm9,ymm8,1
	vpxorq	xmm8,xmm8,xmm9
	vmovdqa64	xmm10,XMMWORD[POLY2]


	vpclmulqdq	xmm1,xmm10,xmm8,0x01
	vpslldq	xmm1,xmm1,8
	vpxorq	xmm1,xmm8,xmm1


	vpclmulqdq	xmm9,xmm10,xmm1,0x00
	vpsrldq	xmm9,xmm9,4
	vpclmulqdq	xmm2,xmm10,xmm1,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm9,xmm6,0x96

	sub	r11,256
	je	NEAR $L$_CALC_AAD_done_1

	add	r10,256

$L$_less_than_16x16_1:

	lea	r12,[byte64_len_to_mask_table]
	lea	r12,[r11*8+r12]


	add	r11d,15
	shr	r11d,4
	cmp	r11d,2
	jb	NEAR $L$_AAD_blocks_1_1
	je	NEAR $L$_AAD_blocks_2_1
	cmp	r11d,4
	jb	NEAR $L$_AAD_blocks_3_1
	je	NEAR $L$_AAD_blocks_4_1
	cmp	r11d,6
	jb	NEAR $L$_AAD_blocks_5_1
	je	NEAR $L$_AAD_blocks_6_1
	cmp	r11d,8
	jb	NEAR $L$_AAD_blocks_7_1
	je	NEAR $L$_AAD_blocks_8_1
	cmp	r11d,10
	jb	NEAR $L$_AAD_blocks_9_1
	je	NEAR $L$_AAD_blocks_10_1
	cmp	r11d,12
	jb	NEAR $L$_AAD_blocks_11_1
	je	NEAR $L$_AAD_blocks_12_1
	cmp	r11d,14
	jb	NEAR $L$_AAD_blocks_13_1
	je	NEAR $L$_AAD_blocks_14_1
	cmp	r11d,15
	je	NEAR $L$_AAD_blocks_15_1
$L$_AAD_blocks_16_1:
	sub	r12,1536
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	zmm4,ZMMWORD[128+r10]
	vmovdqu8	zmm5{k1}{z},[192+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm15,ZMMWORD[96+rdx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[160+rdx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[224+rdx]
	vpclmulqdq	zmm11,zmm4,zmm15,0x11
	vpclmulqdq	zmm3,zmm4,zmm15,0x00
	vpternlogq	zmm1,zmm11,zmm9,0x96
	vpternlogq	zmm6,zmm3,zmm10,0x96
	vpclmulqdq	zmm11,zmm4,zmm15,0x01
	vpclmulqdq	zmm3,zmm4,zmm15,0x10
	vpternlogq	zmm7,zmm11,zmm12,0x96
	vpternlogq	zmm8,zmm3,zmm13,0x96
	vmovdqu64	zmm15,ZMMWORD[288+rdx]
	vpclmulqdq	zmm9,zmm5,zmm15,0x11
	vpclmulqdq	zmm10,zmm5,zmm15,0x00
	vpclmulqdq	zmm12,zmm5,zmm15,0x01
	vpclmulqdq	zmm13,zmm5,zmm15,0x10
	vpxorq	zmm9,zmm1,zmm9
	vpxorq	zmm10,zmm6,zmm10
	vpxorq	zmm12,zmm7,zmm12
	vpxorq	zmm13,zmm8,zmm13

	vpxorq	zmm12,zmm12,zmm13
	vpsrldq	zmm7,zmm12,8
	vpslldq	zmm8,zmm12,8
	vpxorq	zmm1,zmm9,zmm7
	vpxorq	zmm6,zmm10,zmm8
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_1
$L$_AAD_blocks_15_1:
	sub	r12,1536
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	zmm4,ZMMWORD[128+r10]
	vmovdqu8	zmm5{k1}{z},[192+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm15,ZMMWORD[112+rdx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[176+rdx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[240+rdx]
	vpclmulqdq	zmm11,zmm4,zmm15,0x11
	vpclmulqdq	zmm3,zmm4,zmm15,0x00
	vpternlogq	zmm9,zmm11,zmm1,0x96
	vpternlogq	zmm10,zmm3,zmm6,0x96
	vpclmulqdq	zmm11,zmm4,zmm15,0x01
	vpclmulqdq	zmm3,zmm4,zmm15,0x10
	vpternlogq	zmm12,zmm11,zmm7,0x96
	vpternlogq	zmm13,zmm3,zmm8,0x96
	vmovdqu64	ymm15,YMMWORD[304+rdx]
	vinserti64x2	zmm15,zmm15,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm7,zmm5,zmm15,0x01
	vpclmulqdq	zmm8,zmm5,zmm15,0x10
	vpclmulqdq	zmm1,zmm5,zmm15,0x11
	vpclmulqdq	zmm6,zmm5,zmm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_1
$L$_AAD_blocks_14_1:
	sub	r12,1536
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	zmm4,ZMMWORD[128+r10]
	vmovdqu8	ymm5{k1}{z},[192+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	ymm5,ymm5,ymm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm15,ZMMWORD[128+rdx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[192+rdx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[256+rdx]
	vpclmulqdq	zmm11,zmm4,zmm15,0x11
	vpclmulqdq	zmm3,zmm4,zmm15,0x00
	vpternlogq	zmm9,zmm11,zmm1,0x96
	vpternlogq	zmm10,zmm3,zmm6,0x96
	vpclmulqdq	zmm11,zmm4,zmm15,0x01
	vpclmulqdq	zmm3,zmm4,zmm15,0x10
	vpternlogq	zmm12,zmm11,zmm7,0x96
	vpternlogq	zmm13,zmm3,zmm8,0x96
	vmovdqu64	ymm15,YMMWORD[320+rdx]
	vpclmulqdq	ymm7,ymm5,ymm15,0x01
	vpclmulqdq	ymm8,ymm5,ymm15,0x10
	vpclmulqdq	ymm1,ymm5,ymm15,0x11
	vpclmulqdq	ymm6,ymm5,ymm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_1
$L$_AAD_blocks_13_1:
	sub	r12,1536
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	zmm4,ZMMWORD[128+r10]
	vmovdqu8	xmm5{k1}{z},[192+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	xmm5,xmm5,xmm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm15,ZMMWORD[144+rdx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[208+rdx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[272+rdx]
	vpclmulqdq	zmm11,zmm4,zmm15,0x11
	vpclmulqdq	zmm3,zmm4,zmm15,0x00
	vpternlogq	zmm9,zmm11,zmm1,0x96
	vpternlogq	zmm10,zmm3,zmm6,0x96
	vpclmulqdq	zmm11,zmm4,zmm15,0x01
	vpclmulqdq	zmm3,zmm4,zmm15,0x10
	vpternlogq	zmm12,zmm11,zmm7,0x96
	vpternlogq	zmm13,zmm3,zmm8,0x96
	vmovdqu64	xmm15,XMMWORD[336+rdx]
	vpclmulqdq	xmm7,xmm5,xmm15,0x01
	vpclmulqdq	xmm8,xmm5,xmm15,0x10
	vpclmulqdq	xmm1,xmm5,xmm15,0x11
	vpclmulqdq	xmm6,xmm5,xmm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_1
$L$_AAD_blocks_12_1:
	sub	r12,1024
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	zmm4{k1}{z},[128+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm15,ZMMWORD[160+rdx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[224+rdx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[288+rdx]
	vpclmulqdq	zmm11,zmm4,zmm15,0x11
	vpclmulqdq	zmm3,zmm4,zmm15,0x00
	vpternlogq	zmm9,zmm11,zmm1,0x96
	vpternlogq	zmm10,zmm3,zmm6,0x96
	vpclmulqdq	zmm11,zmm4,zmm15,0x01
	vpclmulqdq	zmm3,zmm4,zmm15,0x10
	vpternlogq	zmm12,zmm11,zmm7,0x96
	vpternlogq	zmm13,zmm3,zmm8,0x96

	vpxorq	zmm12,zmm12,zmm13
	vpsrldq	zmm7,zmm12,8
	vpslldq	zmm8,zmm12,8
	vpxorq	zmm1,zmm9,zmm7
	vpxorq	zmm6,zmm10,zmm8
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_1
$L$_AAD_blocks_11_1:
	sub	r12,1024
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	zmm4{k1}{z},[128+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm15,ZMMWORD[176+rdx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[240+rdx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vpxorq	zmm9,zmm1,zmm9
	vpxorq	zmm10,zmm6,zmm10
	vpxorq	zmm12,zmm7,zmm12
	vpxorq	zmm13,zmm8,zmm13
	vmovdqu64	ymm15,YMMWORD[304+rdx]
	vinserti64x2	zmm15,zmm15,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm7,zmm4,zmm15,0x01
	vpclmulqdq	zmm8,zmm4,zmm15,0x10
	vpclmulqdq	zmm1,zmm4,zmm15,0x11
	vpclmulqdq	zmm6,zmm4,zmm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_1
$L$_AAD_blocks_10_1:
	sub	r12,1024
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	ymm4{k1}{z},[128+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	ymm4,ymm4,ymm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm15,ZMMWORD[192+rdx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[256+rdx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vpxorq	zmm9,zmm1,zmm9
	vpxorq	zmm10,zmm6,zmm10
	vpxorq	zmm12,zmm7,zmm12
	vpxorq	zmm13,zmm8,zmm13
	vmovdqu64	ymm15,YMMWORD[320+rdx]
	vpclmulqdq	ymm7,ymm4,ymm15,0x01
	vpclmulqdq	ymm8,ymm4,ymm15,0x10
	vpclmulqdq	ymm1,ymm4,ymm15,0x11
	vpclmulqdq	ymm6,ymm4,ymm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_1
$L$_AAD_blocks_9_1:
	sub	r12,1024
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	xmm4{k1}{z},[128+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	xmm4,xmm4,xmm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm15,ZMMWORD[208+rdx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[272+rdx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vpxorq	zmm9,zmm1,zmm9
	vpxorq	zmm10,zmm6,zmm10
	vpxorq	zmm12,zmm7,zmm12
	vpxorq	zmm13,zmm8,zmm13
	vmovdqu64	xmm15,XMMWORD[336+rdx]
	vpclmulqdq	xmm7,xmm4,xmm15,0x01
	vpclmulqdq	xmm8,xmm4,xmm15,0x10
	vpclmulqdq	xmm1,xmm4,xmm15,0x11
	vpclmulqdq	xmm6,xmm4,xmm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_1
$L$_AAD_blocks_8_1:
	sub	r12,512
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3{k1}{z},[64+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm15,ZMMWORD[224+rdx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[288+rdx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vpxorq	zmm9,zmm1,zmm9
	vpxorq	zmm10,zmm6,zmm10
	vpxorq	zmm12,zmm7,zmm12
	vpxorq	zmm13,zmm8,zmm13

	vpxorq	zmm12,zmm12,zmm13
	vpsrldq	zmm7,zmm12,8
	vpslldq	zmm8,zmm12,8
	vpxorq	zmm1,zmm9,zmm7
	vpxorq	zmm6,zmm10,zmm8
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_1
$L$_AAD_blocks_7_1:
	sub	r12,512
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3{k1}{z},[64+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm15,ZMMWORD[240+rdx]
	vpclmulqdq	zmm9,zmm11,zmm15,0x11
	vpclmulqdq	zmm10,zmm11,zmm15,0x00
	vpclmulqdq	zmm12,zmm11,zmm15,0x01
	vpclmulqdq	zmm13,zmm11,zmm15,0x10
	vmovdqu64	ymm15,YMMWORD[304+rdx]
	vinserti64x2	zmm15,zmm15,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm7,zmm3,zmm15,0x01
	vpclmulqdq	zmm8,zmm3,zmm15,0x10
	vpclmulqdq	zmm1,zmm3,zmm15,0x11
	vpclmulqdq	zmm6,zmm3,zmm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_1
$L$_AAD_blocks_6_1:
	sub	r12,512
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	ymm3{k1}{z},[64+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	ymm3,ymm3,ymm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm15,ZMMWORD[256+rdx]
	vpclmulqdq	zmm9,zmm11,zmm15,0x11
	vpclmulqdq	zmm10,zmm11,zmm15,0x00
	vpclmulqdq	zmm12,zmm11,zmm15,0x01
	vpclmulqdq	zmm13,zmm11,zmm15,0x10
	vmovdqu64	ymm15,YMMWORD[320+rdx]
	vpclmulqdq	ymm7,ymm3,ymm15,0x01
	vpclmulqdq	ymm8,ymm3,ymm15,0x10
	vpclmulqdq	ymm1,ymm3,ymm15,0x11
	vpclmulqdq	ymm6,ymm3,ymm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_1
$L$_AAD_blocks_5_1:
	sub	r12,512
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	xmm3{k1}{z},[64+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	xmm3,xmm3,xmm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm15,ZMMWORD[272+rdx]
	vpclmulqdq	zmm9,zmm11,zmm15,0x11
	vpclmulqdq	zmm10,zmm11,zmm15,0x00
	vpclmulqdq	zmm12,zmm11,zmm15,0x01
	vpclmulqdq	zmm13,zmm11,zmm15,0x10
	vmovdqu64	xmm15,XMMWORD[336+rdx]
	vpclmulqdq	xmm7,xmm3,xmm15,0x01
	vpclmulqdq	xmm8,xmm3,xmm15,0x10
	vpclmulqdq	xmm1,xmm3,xmm15,0x11
	vpclmulqdq	xmm6,xmm3,xmm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_1
$L$_AAD_blocks_4_1:
	kmovq	k1,[r12]
	vmovdqu8	zmm11{k1}{z},[r10]
	vpshufb	zmm11,zmm11,zmm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	zmm15,ZMMWORD[288+rdx]
	vpclmulqdq	zmm9,zmm11,zmm15,0x11
	vpclmulqdq	zmm10,zmm11,zmm15,0x00
	vpclmulqdq	zmm12,zmm11,zmm15,0x01
	vpclmulqdq	zmm13,zmm11,zmm15,0x10

	vpxorq	zmm12,zmm12,zmm13
	vpsrldq	zmm7,zmm12,8
	vpslldq	zmm8,zmm12,8
	vpxorq	zmm1,zmm9,zmm7
	vpxorq	zmm6,zmm10,zmm8
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_1
$L$_AAD_blocks_3_1:
	kmovq	k1,[r12]
	vmovdqu8	zmm11{k1}{z},[r10]
	vpshufb	zmm11,zmm11,zmm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	ymm15,YMMWORD[304+rdx]
	vinserti64x2	zmm15,zmm15,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_1
$L$_AAD_blocks_2_1:
	kmovq	k1,[r12]
	vmovdqu8	ymm11{k1}{z},[r10]
	vpshufb	ymm11,ymm11,ymm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	ymm15,YMMWORD[320+rdx]
	vpclmulqdq	ymm7,ymm11,ymm15,0x01
	vpclmulqdq	ymm8,ymm11,ymm15,0x10
	vpclmulqdq	ymm1,ymm11,ymm15,0x11
	vpclmulqdq	ymm6,ymm11,ymm15,0x00

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_1
$L$_AAD_blocks_1_1:
	kmovq	k1,[r12]
	vmovdqu8	xmm11{k1}{z},[r10]
	vpshufb	xmm11,xmm11,xmm16
	vpxorq	zmm11,zmm11,zmm2
	vmovdqu64	xmm15,XMMWORD[336+rdx]
	vpclmulqdq	xmm7,xmm11,xmm15,0x01
	vpclmulqdq	xmm8,xmm11,xmm15,0x10
	vpclmulqdq	xmm1,xmm11,xmm15,0x11
	vpclmulqdq	xmm6,xmm11,xmm15,0x00

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm2,xmm15,xmm7,0x10
	vpslldq	xmm2,xmm2,4
	vpternlogq	xmm2,xmm8,xmm1,0x96

$L$_CALC_AAD_done_1:
	mov	r10,r9
	shl	r10,3
	vmovq	xmm3,r10


	vpxorq	xmm2,xmm3,xmm2

	vmovdqu64	xmm1,XMMWORD[336+rdx]

	vpclmulqdq	xmm11,xmm2,xmm1,0x11
	vpclmulqdq	xmm3,xmm2,xmm1,0x00
	vpclmulqdq	xmm4,xmm2,xmm1,0x01
	vpclmulqdq	xmm2,xmm2,xmm1,0x10
	vpxorq	xmm2,xmm2,xmm4

	vpsrldq	xmm4,xmm2,8
	vpslldq	xmm2,xmm2,8
	vpxorq	xmm11,xmm11,xmm4
	vpxorq	xmm2,xmm2,xmm3



	vmovdqu64	xmm4,XMMWORD[POLY2]

	vpclmulqdq	xmm3,xmm4,xmm2,0x01
	vpslldq	xmm3,xmm3,8
	vpxorq	xmm2,xmm2,xmm3



	vpclmulqdq	xmm3,xmm4,xmm2,0x00
	vpsrldq	xmm3,xmm3,4
	vpclmulqdq	xmm2,xmm4,xmm2,0x10
	vpslldq	xmm2,xmm2,4

	vpternlogq	xmm2,xmm11,xmm3,0x96

	vpshufb	xmm2,xmm2,XMMWORD[SHUF_MASK]
	jmp	NEAR skip_iv_len_12_init_IV
iv_len_12_init_IV:

	vmovdqu8	xmm2,XMMWORD[ONEf]
	mov	r11,r8
	mov	r10d,0x0000000000000fff
	kmovq	k1,r10
	vmovdqu8	xmm2{k1},[r11]
skip_iv_len_12_init_IV:
	vmovdqu	xmm1,xmm2


	mov	r10d,DWORD[240+rcx]
	cmp	r10d,9
	je	NEAR $L$aes_128_4
	cmp	r10d,11
	je	NEAR $L$aes_192_4
	cmp	r10d,13
	je	NEAR $L$aes_256_4
	jmp	NEAR $L$exit_aes_4
ALIGN	32
$L$aes_128_4:
	vpxorq	xmm1,xmm1,XMMWORD[rcx]

	vaesenc	xmm1,xmm1,XMMWORD[16+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[32+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[48+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[64+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[80+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[96+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[112+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[128+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[144+rcx]

	vaesenclast	xmm1,xmm1,XMMWORD[160+rcx]
	jmp	NEAR $L$exit_aes_4
ALIGN	32
$L$aes_192_4:
	vpxorq	xmm1,xmm1,XMMWORD[rcx]

	vaesenc	xmm1,xmm1,XMMWORD[16+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[32+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[48+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[64+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[80+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[96+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[112+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[128+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[144+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[160+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[176+rcx]

	vaesenclast	xmm1,xmm1,XMMWORD[192+rcx]
	jmp	NEAR $L$exit_aes_4
ALIGN	32
$L$aes_256_4:
	vpxorq	xmm1,xmm1,XMMWORD[rcx]

	vaesenc	xmm1,xmm1,XMMWORD[16+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[32+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[48+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[64+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[80+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[96+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[112+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[128+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[144+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[160+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[176+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[192+rcx]

	vaesenc	xmm1,xmm1,XMMWORD[208+rcx]

	vaesenclast	xmm1,xmm1,XMMWORD[224+rcx]
	jmp	NEAR $L$exit_aes_4
$L$exit_aes_4:

	vmovdqu	XMMWORD[32+rdx],xmm1


	vpshufb	xmm2,xmm2,XMMWORD[SHUF_MASK]
	vmovdqu	XMMWORD[rdx],xmm2
	cmp	r9,256
	jbe	NEAR $L$skip_hkeys_cleanup_5
	vpxor	xmm0,xmm0,xmm0
	vmovdqa64	ZMMWORD[rsp],zmm0
	vmovdqa64	ZMMWORD[64+rsp],zmm0
	vmovdqa64	ZMMWORD[128+rsp],zmm0
	vmovdqa64	ZMMWORD[192+rsp],zmm0
	vmovdqa64	ZMMWORD[256+rsp],zmm0
	vmovdqa64	ZMMWORD[320+rsp],zmm0
	vmovdqa64	ZMMWORD[384+rsp],zmm0
	vmovdqa64	ZMMWORD[448+rsp],zmm0
	vmovdqa64	ZMMWORD[512+rsp],zmm0
	vmovdqa64	ZMMWORD[576+rsp],zmm0
	vmovdqa64	ZMMWORD[640+rsp],zmm0
	vmovdqa64	ZMMWORD[704+rsp],zmm0
$L$skip_hkeys_cleanup_5:
	vzeroupper
	vmovdqu	xmm15,XMMWORD[((-16))+rbp]
	vmovdqu	xmm14,XMMWORD[((-32))+rbp]
	vmovdqu	xmm13,XMMWORD[((-48))+rbp]
	vmovdqu	xmm12,XMMWORD[((-64))+rbp]
	vmovdqu	xmm11,XMMWORD[((-80))+rbp]
	vmovdqu	xmm10,XMMWORD[((-96))+rbp]
	vmovdqu	xmm9,XMMWORD[((-112))+rbp]
	vmovdqu	xmm8,XMMWORD[((-128))+rbp]
	vmovdqu	xmm7,XMMWORD[((-144))+rbp]
	vmovdqu	xmm6,XMMWORD[((-160))+rbp]
	lea	rsp,[8+rbp]
	pop	rsi

	pop	rdi

	pop	r15

	pop	r14

	pop	r13

	pop	r12

	pop	rbp

	pop	rbx

$L$abort_setiv:
	DB	0F3h,0C3h		;repret
$L$setiv_seh_end:


global	ossl_aes_gcm_update_aad_avx512

ALIGN	32
ossl_aes_gcm_update_aad_avx512:

$L$ghash_seh_begin:
DB	243,15,30,250
	push	rbx

$L$ghash_seh_push_rbx:
	push	rbp

$L$ghash_seh_push_rbp:
	push	r12

$L$ghash_seh_push_r12:
	push	r13

$L$ghash_seh_push_r13:
	push	r14

$L$ghash_seh_push_r14:
	push	r15

$L$ghash_seh_push_r15:
	push	rdi
$L$ghash_seh_push_rdi:
	push	rsi
$L$ghash_seh_push_rsi:

	sub	rsp,168
$L$ghash_seh_allocstack_xmm:










	lea	rbp,[160+rsp]

$L$ghash_seh_setfp:
	vmovdqu	XMMWORD[rsp],xmm6
$L$ghash_seh_save_xmm6:
	vmovdqu	XMMWORD[16+rsp],xmm7
$L$ghash_seh_save_xmm7:
	vmovdqu	XMMWORD[32+rsp],xmm8
$L$ghash_seh_save_xmm8:
	vmovdqu	XMMWORD[48+rsp],xmm9
$L$ghash_seh_save_xmm9:
	vmovdqu	XMMWORD[64+rsp],xmm10
$L$ghash_seh_save_xmm10:
	vmovdqu	XMMWORD[80+rsp],xmm11
$L$ghash_seh_save_xmm11:
	vmovdqu	XMMWORD[96+rsp],xmm12
$L$ghash_seh_save_xmm12:
	vmovdqu	XMMWORD[112+rsp],xmm13
$L$ghash_seh_save_xmm13:
	vmovdqu	XMMWORD[128+rsp],xmm14
$L$ghash_seh_save_xmm14:
	vmovdqu	XMMWORD[144+rsp],xmm15
$L$ghash_seh_save_xmm15:

$L$ghash_seh_prolog_end:
	sub	rsp,816
	and	rsp,(-64)
	vmovdqu64	xmm14,XMMWORD[64+rcx]
	mov	r10,rdx
	mov	r11,r8
	or	r11,r11
	jz	NEAR $L$_CALC_AAD_done_6

	xor	rbx,rbx
	vmovdqa64	zmm16,ZMMWORD[SHUF_MASK]

$L$_get_AAD_loop48x16_6:
	cmp	r11,768
	jl	NEAR $L$_exit_AAD_loop48x16_6
	vmovdqu64	zmm11,ZMMWORD[r10]
	vmovdqu64	zmm3,ZMMWORD[64+r10]
	vmovdqu64	zmm4,ZMMWORD[128+r10]
	vmovdqu64	zmm5,ZMMWORD[192+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	test	rbx,rbx
	jnz	NEAR $L$_skip_hkeys_precomputation_7

	vmovdqu64	zmm1,ZMMWORD[288+rcx]
	vmovdqu64	ZMMWORD[704+rsp],zmm1

	vmovdqu64	zmm9,ZMMWORD[224+rcx]
	vmovdqu64	ZMMWORD[640+rsp],zmm9


	vshufi64x2	zmm9,zmm9,zmm9,0x00

	vmovdqu64	zmm10,ZMMWORD[160+rcx]
	vmovdqu64	ZMMWORD[576+rsp],zmm10

	vmovdqu64	zmm12,ZMMWORD[96+rcx]
	vmovdqu64	ZMMWORD[512+rsp],zmm12

	vpclmulqdq	zmm13,zmm10,zmm9,0x11
	vpclmulqdq	zmm15,zmm10,zmm9,0x00
	vpclmulqdq	zmm17,zmm10,zmm9,0x01
	vpclmulqdq	zmm10,zmm10,zmm9,0x10
	vpxorq	zmm10,zmm10,zmm17

	vpsrldq	zmm17,zmm10,8
	vpslldq	zmm10,zmm10,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm10,zmm10,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm10,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm10,zmm10,zmm15



	vpclmulqdq	zmm15,zmm17,zmm10,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm10,zmm17,zmm10,0x10
	vpslldq	zmm10,zmm10,4

	vpternlogq	zmm10,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[448+rsp],zmm10

	vpclmulqdq	zmm13,zmm12,zmm9,0x11
	vpclmulqdq	zmm15,zmm12,zmm9,0x00
	vpclmulqdq	zmm17,zmm12,zmm9,0x01
	vpclmulqdq	zmm12,zmm12,zmm9,0x10
	vpxorq	zmm12,zmm12,zmm17

	vpsrldq	zmm17,zmm12,8
	vpslldq	zmm12,zmm12,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm12,zmm12,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm12,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm12,zmm12,zmm15



	vpclmulqdq	zmm15,zmm17,zmm12,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm12,zmm17,zmm12,0x10
	vpslldq	zmm12,zmm12,4

	vpternlogq	zmm12,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[384+rsp],zmm12

	vpclmulqdq	zmm13,zmm10,zmm9,0x11
	vpclmulqdq	zmm15,zmm10,zmm9,0x00
	vpclmulqdq	zmm17,zmm10,zmm9,0x01
	vpclmulqdq	zmm10,zmm10,zmm9,0x10
	vpxorq	zmm10,zmm10,zmm17

	vpsrldq	zmm17,zmm10,8
	vpslldq	zmm10,zmm10,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm10,zmm10,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm10,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm10,zmm10,zmm15



	vpclmulqdq	zmm15,zmm17,zmm10,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm10,zmm17,zmm10,0x10
	vpslldq	zmm10,zmm10,4

	vpternlogq	zmm10,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[320+rsp],zmm10

	vpclmulqdq	zmm13,zmm12,zmm9,0x11
	vpclmulqdq	zmm15,zmm12,zmm9,0x00
	vpclmulqdq	zmm17,zmm12,zmm9,0x01
	vpclmulqdq	zmm12,zmm12,zmm9,0x10
	vpxorq	zmm12,zmm12,zmm17

	vpsrldq	zmm17,zmm12,8
	vpslldq	zmm12,zmm12,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm12,zmm12,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm12,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm12,zmm12,zmm15



	vpclmulqdq	zmm15,zmm17,zmm12,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm12,zmm17,zmm12,0x10
	vpslldq	zmm12,zmm12,4

	vpternlogq	zmm12,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[256+rsp],zmm12

	vpclmulqdq	zmm13,zmm10,zmm9,0x11
	vpclmulqdq	zmm15,zmm10,zmm9,0x00
	vpclmulqdq	zmm17,zmm10,zmm9,0x01
	vpclmulqdq	zmm10,zmm10,zmm9,0x10
	vpxorq	zmm10,zmm10,zmm17

	vpsrldq	zmm17,zmm10,8
	vpslldq	zmm10,zmm10,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm10,zmm10,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm10,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm10,zmm10,zmm15



	vpclmulqdq	zmm15,zmm17,zmm10,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm10,zmm17,zmm10,0x10
	vpslldq	zmm10,zmm10,4

	vpternlogq	zmm10,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[192+rsp],zmm10

	vpclmulqdq	zmm13,zmm12,zmm9,0x11
	vpclmulqdq	zmm15,zmm12,zmm9,0x00
	vpclmulqdq	zmm17,zmm12,zmm9,0x01
	vpclmulqdq	zmm12,zmm12,zmm9,0x10
	vpxorq	zmm12,zmm12,zmm17

	vpsrldq	zmm17,zmm12,8
	vpslldq	zmm12,zmm12,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm12,zmm12,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm12,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm12,zmm12,zmm15



	vpclmulqdq	zmm15,zmm17,zmm12,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm12,zmm17,zmm12,0x10
	vpslldq	zmm12,zmm12,4

	vpternlogq	zmm12,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[128+rsp],zmm12

	vpclmulqdq	zmm13,zmm10,zmm9,0x11
	vpclmulqdq	zmm15,zmm10,zmm9,0x00
	vpclmulqdq	zmm17,zmm10,zmm9,0x01
	vpclmulqdq	zmm10,zmm10,zmm9,0x10
	vpxorq	zmm10,zmm10,zmm17

	vpsrldq	zmm17,zmm10,8
	vpslldq	zmm10,zmm10,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm10,zmm10,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm10,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm10,zmm10,zmm15



	vpclmulqdq	zmm15,zmm17,zmm10,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm10,zmm17,zmm10,0x10
	vpslldq	zmm10,zmm10,4

	vpternlogq	zmm10,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[64+rsp],zmm10

	vpclmulqdq	zmm13,zmm12,zmm9,0x11
	vpclmulqdq	zmm15,zmm12,zmm9,0x00
	vpclmulqdq	zmm17,zmm12,zmm9,0x01
	vpclmulqdq	zmm12,zmm12,zmm9,0x10
	vpxorq	zmm12,zmm12,zmm17

	vpsrldq	zmm17,zmm12,8
	vpslldq	zmm12,zmm12,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm12,zmm12,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm12,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm12,zmm12,zmm15



	vpclmulqdq	zmm15,zmm17,zmm12,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm12,zmm17,zmm12,0x10
	vpslldq	zmm12,zmm12,4

	vpternlogq	zmm12,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[rsp],zmm12
$L$_skip_hkeys_precomputation_7:
	mov	rbx,1
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm19,ZMMWORD[rsp]
	vpclmulqdq	zmm1,zmm11,zmm19,0x11
	vpclmulqdq	zmm9,zmm11,zmm19,0x00
	vpclmulqdq	zmm10,zmm11,zmm19,0x01
	vpclmulqdq	zmm12,zmm11,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[64+rsp]
	vpclmulqdq	zmm13,zmm3,zmm19,0x11
	vpclmulqdq	zmm15,zmm3,zmm19,0x00
	vpclmulqdq	zmm17,zmm3,zmm19,0x01
	vpclmulqdq	zmm18,zmm3,zmm19,0x10
	vpxorq	zmm7,zmm10,zmm17
	vpxorq	zmm6,zmm1,zmm13
	vpxorq	zmm8,zmm9,zmm15
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm19,ZMMWORD[128+rsp]
	vpclmulqdq	zmm1,zmm4,zmm19,0x11
	vpclmulqdq	zmm9,zmm4,zmm19,0x00
	vpclmulqdq	zmm10,zmm4,zmm19,0x01
	vpclmulqdq	zmm12,zmm4,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[192+rsp]
	vpclmulqdq	zmm13,zmm5,zmm19,0x11
	vpclmulqdq	zmm15,zmm5,zmm19,0x00
	vpclmulqdq	zmm17,zmm5,zmm19,0x01
	vpclmulqdq	zmm18,zmm5,zmm19,0x10

	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm11,ZMMWORD[256+r10]
	vmovdqu64	zmm3,ZMMWORD[320+r10]
	vmovdqu64	zmm4,ZMMWORD[384+r10]
	vmovdqu64	zmm5,ZMMWORD[448+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	vmovdqu64	zmm19,ZMMWORD[256+rsp]
	vpclmulqdq	zmm1,zmm11,zmm19,0x11
	vpclmulqdq	zmm9,zmm11,zmm19,0x00
	vpclmulqdq	zmm10,zmm11,zmm19,0x01
	vpclmulqdq	zmm12,zmm11,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[320+rsp]
	vpclmulqdq	zmm13,zmm3,zmm19,0x11
	vpclmulqdq	zmm15,zmm3,zmm19,0x00
	vpclmulqdq	zmm17,zmm3,zmm19,0x01
	vpclmulqdq	zmm18,zmm3,zmm19,0x10
	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm19,ZMMWORD[384+rsp]
	vpclmulqdq	zmm1,zmm4,zmm19,0x11
	vpclmulqdq	zmm9,zmm4,zmm19,0x00
	vpclmulqdq	zmm10,zmm4,zmm19,0x01
	vpclmulqdq	zmm12,zmm4,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[448+rsp]
	vpclmulqdq	zmm13,zmm5,zmm19,0x11
	vpclmulqdq	zmm15,zmm5,zmm19,0x00
	vpclmulqdq	zmm17,zmm5,zmm19,0x01
	vpclmulqdq	zmm18,zmm5,zmm19,0x10

	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm11,ZMMWORD[512+r10]
	vmovdqu64	zmm3,ZMMWORD[576+r10]
	vmovdqu64	zmm4,ZMMWORD[640+r10]
	vmovdqu64	zmm5,ZMMWORD[704+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	vmovdqu64	zmm19,ZMMWORD[512+rsp]
	vpclmulqdq	zmm1,zmm11,zmm19,0x11
	vpclmulqdq	zmm9,zmm11,zmm19,0x00
	vpclmulqdq	zmm10,zmm11,zmm19,0x01
	vpclmulqdq	zmm12,zmm11,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[576+rsp]
	vpclmulqdq	zmm13,zmm3,zmm19,0x11
	vpclmulqdq	zmm15,zmm3,zmm19,0x00
	vpclmulqdq	zmm17,zmm3,zmm19,0x01
	vpclmulqdq	zmm18,zmm3,zmm19,0x10
	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm19,ZMMWORD[640+rsp]
	vpclmulqdq	zmm1,zmm4,zmm19,0x11
	vpclmulqdq	zmm9,zmm4,zmm19,0x00
	vpclmulqdq	zmm10,zmm4,zmm19,0x01
	vpclmulqdq	zmm12,zmm4,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[704+rsp]
	vpclmulqdq	zmm13,zmm5,zmm19,0x11
	vpclmulqdq	zmm15,zmm5,zmm19,0x00
	vpclmulqdq	zmm17,zmm5,zmm19,0x01
	vpclmulqdq	zmm18,zmm5,zmm19,0x10

	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96

	vpsrldq	zmm1,zmm7,8
	vpslldq	zmm9,zmm7,8
	vpxorq	zmm6,zmm6,zmm1
	vpxorq	zmm8,zmm8,zmm9
	vextracti64x4	ymm1,zmm6,1
	vpxorq	ymm6,ymm6,ymm1
	vextracti32x4	xmm1,ymm6,1
	vpxorq	xmm6,xmm6,xmm1
	vextracti64x4	ymm9,zmm8,1
	vpxorq	ymm8,ymm8,ymm9
	vextracti32x4	xmm9,ymm8,1
	vpxorq	xmm8,xmm8,xmm9
	vmovdqa64	xmm10,XMMWORD[POLY2]


	vpclmulqdq	xmm1,xmm10,xmm8,0x01
	vpslldq	xmm1,xmm1,8
	vpxorq	xmm1,xmm8,xmm1


	vpclmulqdq	xmm9,xmm10,xmm1,0x00
	vpsrldq	xmm9,xmm9,4
	vpclmulqdq	xmm14,xmm10,xmm1,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm9,xmm6,0x96

	sub	r11,768
	je	NEAR $L$_CALC_AAD_done_6

	add	r10,768
	jmp	NEAR $L$_get_AAD_loop48x16_6

$L$_exit_AAD_loop48x16_6:

	cmp	r11,512
	jl	NEAR $L$_less_than_32x16_6

	vmovdqu64	zmm11,ZMMWORD[r10]
	vmovdqu64	zmm3,ZMMWORD[64+r10]
	vmovdqu64	zmm4,ZMMWORD[128+r10]
	vmovdqu64	zmm5,ZMMWORD[192+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	test	rbx,rbx
	jnz	NEAR $L$_skip_hkeys_precomputation_8

	vmovdqu64	zmm1,ZMMWORD[288+rcx]
	vmovdqu64	ZMMWORD[704+rsp],zmm1

	vmovdqu64	zmm9,ZMMWORD[224+rcx]
	vmovdqu64	ZMMWORD[640+rsp],zmm9


	vshufi64x2	zmm9,zmm9,zmm9,0x00

	vmovdqu64	zmm10,ZMMWORD[160+rcx]
	vmovdqu64	ZMMWORD[576+rsp],zmm10

	vmovdqu64	zmm12,ZMMWORD[96+rcx]
	vmovdqu64	ZMMWORD[512+rsp],zmm12

	vpclmulqdq	zmm13,zmm10,zmm9,0x11
	vpclmulqdq	zmm15,zmm10,zmm9,0x00
	vpclmulqdq	zmm17,zmm10,zmm9,0x01
	vpclmulqdq	zmm10,zmm10,zmm9,0x10
	vpxorq	zmm10,zmm10,zmm17

	vpsrldq	zmm17,zmm10,8
	vpslldq	zmm10,zmm10,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm10,zmm10,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm10,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm10,zmm10,zmm15



	vpclmulqdq	zmm15,zmm17,zmm10,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm10,zmm17,zmm10,0x10
	vpslldq	zmm10,zmm10,4

	vpternlogq	zmm10,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[448+rsp],zmm10

	vpclmulqdq	zmm13,zmm12,zmm9,0x11
	vpclmulqdq	zmm15,zmm12,zmm9,0x00
	vpclmulqdq	zmm17,zmm12,zmm9,0x01
	vpclmulqdq	zmm12,zmm12,zmm9,0x10
	vpxorq	zmm12,zmm12,zmm17

	vpsrldq	zmm17,zmm12,8
	vpslldq	zmm12,zmm12,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm12,zmm12,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm12,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm12,zmm12,zmm15



	vpclmulqdq	zmm15,zmm17,zmm12,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm12,zmm17,zmm12,0x10
	vpslldq	zmm12,zmm12,4

	vpternlogq	zmm12,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[384+rsp],zmm12

	vpclmulqdq	zmm13,zmm10,zmm9,0x11
	vpclmulqdq	zmm15,zmm10,zmm9,0x00
	vpclmulqdq	zmm17,zmm10,zmm9,0x01
	vpclmulqdq	zmm10,zmm10,zmm9,0x10
	vpxorq	zmm10,zmm10,zmm17

	vpsrldq	zmm17,zmm10,8
	vpslldq	zmm10,zmm10,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm10,zmm10,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm10,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm10,zmm10,zmm15



	vpclmulqdq	zmm15,zmm17,zmm10,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm10,zmm17,zmm10,0x10
	vpslldq	zmm10,zmm10,4

	vpternlogq	zmm10,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[320+rsp],zmm10

	vpclmulqdq	zmm13,zmm12,zmm9,0x11
	vpclmulqdq	zmm15,zmm12,zmm9,0x00
	vpclmulqdq	zmm17,zmm12,zmm9,0x01
	vpclmulqdq	zmm12,zmm12,zmm9,0x10
	vpxorq	zmm12,zmm12,zmm17

	vpsrldq	zmm17,zmm12,8
	vpslldq	zmm12,zmm12,8
	vpxorq	zmm13,zmm13,zmm17
	vpxorq	zmm12,zmm12,zmm15



	vmovdqu64	zmm17,ZMMWORD[POLY2]

	vpclmulqdq	zmm15,zmm17,zmm12,0x01
	vpslldq	zmm15,zmm15,8
	vpxorq	zmm12,zmm12,zmm15



	vpclmulqdq	zmm15,zmm17,zmm12,0x00
	vpsrldq	zmm15,zmm15,4
	vpclmulqdq	zmm12,zmm17,zmm12,0x10
	vpslldq	zmm12,zmm12,4

	vpternlogq	zmm12,zmm13,zmm15,0x96

	vmovdqu64	ZMMWORD[256+rsp],zmm12
$L$_skip_hkeys_precomputation_8:
	mov	rbx,1
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm19,ZMMWORD[256+rsp]
	vpclmulqdq	zmm1,zmm11,zmm19,0x11
	vpclmulqdq	zmm9,zmm11,zmm19,0x00
	vpclmulqdq	zmm10,zmm11,zmm19,0x01
	vpclmulqdq	zmm12,zmm11,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[320+rsp]
	vpclmulqdq	zmm13,zmm3,zmm19,0x11
	vpclmulqdq	zmm15,zmm3,zmm19,0x00
	vpclmulqdq	zmm17,zmm3,zmm19,0x01
	vpclmulqdq	zmm18,zmm3,zmm19,0x10
	vpxorq	zmm7,zmm10,zmm17
	vpxorq	zmm6,zmm1,zmm13
	vpxorq	zmm8,zmm9,zmm15
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm19,ZMMWORD[384+rsp]
	vpclmulqdq	zmm1,zmm4,zmm19,0x11
	vpclmulqdq	zmm9,zmm4,zmm19,0x00
	vpclmulqdq	zmm10,zmm4,zmm19,0x01
	vpclmulqdq	zmm12,zmm4,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[448+rsp]
	vpclmulqdq	zmm13,zmm5,zmm19,0x11
	vpclmulqdq	zmm15,zmm5,zmm19,0x00
	vpclmulqdq	zmm17,zmm5,zmm19,0x01
	vpclmulqdq	zmm18,zmm5,zmm19,0x10

	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm11,ZMMWORD[256+r10]
	vmovdqu64	zmm3,ZMMWORD[320+r10]
	vmovdqu64	zmm4,ZMMWORD[384+r10]
	vmovdqu64	zmm5,ZMMWORD[448+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	vmovdqu64	zmm19,ZMMWORD[512+rsp]
	vpclmulqdq	zmm1,zmm11,zmm19,0x11
	vpclmulqdq	zmm9,zmm11,zmm19,0x00
	vpclmulqdq	zmm10,zmm11,zmm19,0x01
	vpclmulqdq	zmm12,zmm11,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[576+rsp]
	vpclmulqdq	zmm13,zmm3,zmm19,0x11
	vpclmulqdq	zmm15,zmm3,zmm19,0x00
	vpclmulqdq	zmm17,zmm3,zmm19,0x01
	vpclmulqdq	zmm18,zmm3,zmm19,0x10
	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm19,ZMMWORD[640+rsp]
	vpclmulqdq	zmm1,zmm4,zmm19,0x11
	vpclmulqdq	zmm9,zmm4,zmm19,0x00
	vpclmulqdq	zmm10,zmm4,zmm19,0x01
	vpclmulqdq	zmm12,zmm4,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[704+rsp]
	vpclmulqdq	zmm13,zmm5,zmm19,0x11
	vpclmulqdq	zmm15,zmm5,zmm19,0x00
	vpclmulqdq	zmm17,zmm5,zmm19,0x01
	vpclmulqdq	zmm18,zmm5,zmm19,0x10

	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96

	vpsrldq	zmm1,zmm7,8
	vpslldq	zmm9,zmm7,8
	vpxorq	zmm6,zmm6,zmm1
	vpxorq	zmm8,zmm8,zmm9
	vextracti64x4	ymm1,zmm6,1
	vpxorq	ymm6,ymm6,ymm1
	vextracti32x4	xmm1,ymm6,1
	vpxorq	xmm6,xmm6,xmm1
	vextracti64x4	ymm9,zmm8,1
	vpxorq	ymm8,ymm8,ymm9
	vextracti32x4	xmm9,ymm8,1
	vpxorq	xmm8,xmm8,xmm9
	vmovdqa64	xmm10,XMMWORD[POLY2]


	vpclmulqdq	xmm1,xmm10,xmm8,0x01
	vpslldq	xmm1,xmm1,8
	vpxorq	xmm1,xmm8,xmm1


	vpclmulqdq	xmm9,xmm10,xmm1,0x00
	vpsrldq	xmm9,xmm9,4
	vpclmulqdq	xmm14,xmm10,xmm1,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm9,xmm6,0x96

	sub	r11,512
	je	NEAR $L$_CALC_AAD_done_6

	add	r10,512
	jmp	NEAR $L$_less_than_16x16_6

$L$_less_than_32x16_6:
	cmp	r11,256
	jl	NEAR $L$_less_than_16x16_6

	vmovdqu64	zmm11,ZMMWORD[r10]
	vmovdqu64	zmm3,ZMMWORD[64+r10]
	vmovdqu64	zmm4,ZMMWORD[128+r10]
	vmovdqu64	zmm5,ZMMWORD[192+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm19,ZMMWORD[96+rcx]
	vpclmulqdq	zmm1,zmm11,zmm19,0x11
	vpclmulqdq	zmm9,zmm11,zmm19,0x00
	vpclmulqdq	zmm10,zmm11,zmm19,0x01
	vpclmulqdq	zmm12,zmm11,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[160+rcx]
	vpclmulqdq	zmm13,zmm3,zmm19,0x11
	vpclmulqdq	zmm15,zmm3,zmm19,0x00
	vpclmulqdq	zmm17,zmm3,zmm19,0x01
	vpclmulqdq	zmm18,zmm3,zmm19,0x10
	vpxorq	zmm7,zmm10,zmm17
	vpxorq	zmm6,zmm1,zmm13
	vpxorq	zmm8,zmm9,zmm15
	vpternlogq	zmm7,zmm12,zmm18,0x96
	vmovdqu64	zmm19,ZMMWORD[224+rcx]
	vpclmulqdq	zmm1,zmm4,zmm19,0x11
	vpclmulqdq	zmm9,zmm4,zmm19,0x00
	vpclmulqdq	zmm10,zmm4,zmm19,0x01
	vpclmulqdq	zmm12,zmm4,zmm19,0x10
	vmovdqu64	zmm19,ZMMWORD[288+rcx]
	vpclmulqdq	zmm13,zmm5,zmm19,0x11
	vpclmulqdq	zmm15,zmm5,zmm19,0x00
	vpclmulqdq	zmm17,zmm5,zmm19,0x01
	vpclmulqdq	zmm18,zmm5,zmm19,0x10

	vpternlogq	zmm7,zmm10,zmm17,0x96
	vpternlogq	zmm6,zmm1,zmm13,0x96
	vpternlogq	zmm8,zmm9,zmm15,0x96
	vpternlogq	zmm7,zmm12,zmm18,0x96

	vpsrldq	zmm1,zmm7,8
	vpslldq	zmm9,zmm7,8
	vpxorq	zmm6,zmm6,zmm1
	vpxorq	zmm8,zmm8,zmm9
	vextracti64x4	ymm1,zmm6,1
	vpxorq	ymm6,ymm6,ymm1
	vextracti32x4	xmm1,ymm6,1
	vpxorq	xmm6,xmm6,xmm1
	vextracti64x4	ymm9,zmm8,1
	vpxorq	ymm8,ymm8,ymm9
	vextracti32x4	xmm9,ymm8,1
	vpxorq	xmm8,xmm8,xmm9
	vmovdqa64	xmm10,XMMWORD[POLY2]


	vpclmulqdq	xmm1,xmm10,xmm8,0x01
	vpslldq	xmm1,xmm1,8
	vpxorq	xmm1,xmm8,xmm1


	vpclmulqdq	xmm9,xmm10,xmm1,0x00
	vpsrldq	xmm9,xmm9,4
	vpclmulqdq	xmm14,xmm10,xmm1,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm9,xmm6,0x96

	sub	r11,256
	je	NEAR $L$_CALC_AAD_done_6

	add	r10,256

$L$_less_than_16x16_6:

	lea	r12,[byte64_len_to_mask_table]
	lea	r12,[r11*8+r12]


	add	r11d,15
	shr	r11d,4
	cmp	r11d,2
	jb	NEAR $L$_AAD_blocks_1_6
	je	NEAR $L$_AAD_blocks_2_6
	cmp	r11d,4
	jb	NEAR $L$_AAD_blocks_3_6
	je	NEAR $L$_AAD_blocks_4_6
	cmp	r11d,6
	jb	NEAR $L$_AAD_blocks_5_6
	je	NEAR $L$_AAD_blocks_6_6
	cmp	r11d,8
	jb	NEAR $L$_AAD_blocks_7_6
	je	NEAR $L$_AAD_blocks_8_6
	cmp	r11d,10
	jb	NEAR $L$_AAD_blocks_9_6
	je	NEAR $L$_AAD_blocks_10_6
	cmp	r11d,12
	jb	NEAR $L$_AAD_blocks_11_6
	je	NEAR $L$_AAD_blocks_12_6
	cmp	r11d,14
	jb	NEAR $L$_AAD_blocks_13_6
	je	NEAR $L$_AAD_blocks_14_6
	cmp	r11d,15
	je	NEAR $L$_AAD_blocks_15_6
$L$_AAD_blocks_16_6:
	sub	r12,1536
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	zmm4,ZMMWORD[128+r10]
	vmovdqu8	zmm5{k1}{z},[192+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm15,ZMMWORD[96+rcx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[160+rcx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[224+rcx]
	vpclmulqdq	zmm11,zmm4,zmm15,0x11
	vpclmulqdq	zmm3,zmm4,zmm15,0x00
	vpternlogq	zmm1,zmm11,zmm9,0x96
	vpternlogq	zmm6,zmm3,zmm10,0x96
	vpclmulqdq	zmm11,zmm4,zmm15,0x01
	vpclmulqdq	zmm3,zmm4,zmm15,0x10
	vpternlogq	zmm7,zmm11,zmm12,0x96
	vpternlogq	zmm8,zmm3,zmm13,0x96
	vmovdqu64	zmm15,ZMMWORD[288+rcx]
	vpclmulqdq	zmm9,zmm5,zmm15,0x11
	vpclmulqdq	zmm10,zmm5,zmm15,0x00
	vpclmulqdq	zmm12,zmm5,zmm15,0x01
	vpclmulqdq	zmm13,zmm5,zmm15,0x10
	vpxorq	zmm9,zmm1,zmm9
	vpxorq	zmm10,zmm6,zmm10
	vpxorq	zmm12,zmm7,zmm12
	vpxorq	zmm13,zmm8,zmm13

	vpxorq	zmm12,zmm12,zmm13
	vpsrldq	zmm7,zmm12,8
	vpslldq	zmm8,zmm12,8
	vpxorq	zmm1,zmm9,zmm7
	vpxorq	zmm6,zmm10,zmm8
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_6
$L$_AAD_blocks_15_6:
	sub	r12,1536
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	zmm4,ZMMWORD[128+r10]
	vmovdqu8	zmm5{k1}{z},[192+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	zmm5,zmm5,zmm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm15,ZMMWORD[112+rcx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[176+rcx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[240+rcx]
	vpclmulqdq	zmm11,zmm4,zmm15,0x11
	vpclmulqdq	zmm3,zmm4,zmm15,0x00
	vpternlogq	zmm9,zmm11,zmm1,0x96
	vpternlogq	zmm10,zmm3,zmm6,0x96
	vpclmulqdq	zmm11,zmm4,zmm15,0x01
	vpclmulqdq	zmm3,zmm4,zmm15,0x10
	vpternlogq	zmm12,zmm11,zmm7,0x96
	vpternlogq	zmm13,zmm3,zmm8,0x96
	vmovdqu64	ymm15,YMMWORD[304+rcx]
	vinserti64x2	zmm15,zmm15,ZMMWORD[336+rcx],2
	vpclmulqdq	zmm7,zmm5,zmm15,0x01
	vpclmulqdq	zmm8,zmm5,zmm15,0x10
	vpclmulqdq	zmm1,zmm5,zmm15,0x11
	vpclmulqdq	zmm6,zmm5,zmm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_6
$L$_AAD_blocks_14_6:
	sub	r12,1536
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	zmm4,ZMMWORD[128+r10]
	vmovdqu8	ymm5{k1}{z},[192+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	ymm5,ymm5,ymm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm15,ZMMWORD[128+rcx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[192+rcx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[256+rcx]
	vpclmulqdq	zmm11,zmm4,zmm15,0x11
	vpclmulqdq	zmm3,zmm4,zmm15,0x00
	vpternlogq	zmm9,zmm11,zmm1,0x96
	vpternlogq	zmm10,zmm3,zmm6,0x96
	vpclmulqdq	zmm11,zmm4,zmm15,0x01
	vpclmulqdq	zmm3,zmm4,zmm15,0x10
	vpternlogq	zmm12,zmm11,zmm7,0x96
	vpternlogq	zmm13,zmm3,zmm8,0x96
	vmovdqu64	ymm15,YMMWORD[320+rcx]
	vpclmulqdq	ymm7,ymm5,ymm15,0x01
	vpclmulqdq	ymm8,ymm5,ymm15,0x10
	vpclmulqdq	ymm1,ymm5,ymm15,0x11
	vpclmulqdq	ymm6,ymm5,ymm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_6
$L$_AAD_blocks_13_6:
	sub	r12,1536
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	zmm4,ZMMWORD[128+r10]
	vmovdqu8	xmm5{k1}{z},[192+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpshufb	xmm5,xmm5,xmm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm15,ZMMWORD[144+rcx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[208+rcx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[272+rcx]
	vpclmulqdq	zmm11,zmm4,zmm15,0x11
	vpclmulqdq	zmm3,zmm4,zmm15,0x00
	vpternlogq	zmm9,zmm11,zmm1,0x96
	vpternlogq	zmm10,zmm3,zmm6,0x96
	vpclmulqdq	zmm11,zmm4,zmm15,0x01
	vpclmulqdq	zmm3,zmm4,zmm15,0x10
	vpternlogq	zmm12,zmm11,zmm7,0x96
	vpternlogq	zmm13,zmm3,zmm8,0x96
	vmovdqu64	xmm15,XMMWORD[336+rcx]
	vpclmulqdq	xmm7,xmm5,xmm15,0x01
	vpclmulqdq	xmm8,xmm5,xmm15,0x10
	vpclmulqdq	xmm1,xmm5,xmm15,0x11
	vpclmulqdq	xmm6,xmm5,xmm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_6
$L$_AAD_blocks_12_6:
	sub	r12,1024
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	zmm4{k1}{z},[128+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm15,ZMMWORD[160+rcx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[224+rcx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[288+rcx]
	vpclmulqdq	zmm11,zmm4,zmm15,0x11
	vpclmulqdq	zmm3,zmm4,zmm15,0x00
	vpternlogq	zmm9,zmm11,zmm1,0x96
	vpternlogq	zmm10,zmm3,zmm6,0x96
	vpclmulqdq	zmm11,zmm4,zmm15,0x01
	vpclmulqdq	zmm3,zmm4,zmm15,0x10
	vpternlogq	zmm12,zmm11,zmm7,0x96
	vpternlogq	zmm13,zmm3,zmm8,0x96

	vpxorq	zmm12,zmm12,zmm13
	vpsrldq	zmm7,zmm12,8
	vpslldq	zmm8,zmm12,8
	vpxorq	zmm1,zmm9,zmm7
	vpxorq	zmm6,zmm10,zmm8
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_6
$L$_AAD_blocks_11_6:
	sub	r12,1024
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	zmm4{k1}{z},[128+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	zmm4,zmm4,zmm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm15,ZMMWORD[176+rcx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[240+rcx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vpxorq	zmm9,zmm1,zmm9
	vpxorq	zmm10,zmm6,zmm10
	vpxorq	zmm12,zmm7,zmm12
	vpxorq	zmm13,zmm8,zmm13
	vmovdqu64	ymm15,YMMWORD[304+rcx]
	vinserti64x2	zmm15,zmm15,ZMMWORD[336+rcx],2
	vpclmulqdq	zmm7,zmm4,zmm15,0x01
	vpclmulqdq	zmm8,zmm4,zmm15,0x10
	vpclmulqdq	zmm1,zmm4,zmm15,0x11
	vpclmulqdq	zmm6,zmm4,zmm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_6
$L$_AAD_blocks_10_6:
	sub	r12,1024
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	ymm4{k1}{z},[128+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	ymm4,ymm4,ymm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm15,ZMMWORD[192+rcx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[256+rcx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vpxorq	zmm9,zmm1,zmm9
	vpxorq	zmm10,zmm6,zmm10
	vpxorq	zmm12,zmm7,zmm12
	vpxorq	zmm13,zmm8,zmm13
	vmovdqu64	ymm15,YMMWORD[320+rcx]
	vpclmulqdq	ymm7,ymm4,ymm15,0x01
	vpclmulqdq	ymm8,ymm4,ymm15,0x10
	vpclmulqdq	ymm1,ymm4,ymm15,0x11
	vpclmulqdq	ymm6,ymm4,ymm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_6
$L$_AAD_blocks_9_6:
	sub	r12,1024
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3,ZMMWORD[64+r10]
	vmovdqu8	xmm4{k1}{z},[128+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpshufb	xmm4,xmm4,xmm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm15,ZMMWORD[208+rcx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[272+rcx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vpxorq	zmm9,zmm1,zmm9
	vpxorq	zmm10,zmm6,zmm10
	vpxorq	zmm12,zmm7,zmm12
	vpxorq	zmm13,zmm8,zmm13
	vmovdqu64	xmm15,XMMWORD[336+rcx]
	vpclmulqdq	xmm7,xmm4,xmm15,0x01
	vpclmulqdq	xmm8,xmm4,xmm15,0x10
	vpclmulqdq	xmm1,xmm4,xmm15,0x11
	vpclmulqdq	xmm6,xmm4,xmm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_6
$L$_AAD_blocks_8_6:
	sub	r12,512
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3{k1}{z},[64+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm15,ZMMWORD[224+rcx]
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vmovdqu64	zmm15,ZMMWORD[288+rcx]
	vpclmulqdq	zmm9,zmm3,zmm15,0x11
	vpclmulqdq	zmm10,zmm3,zmm15,0x00
	vpclmulqdq	zmm12,zmm3,zmm15,0x01
	vpclmulqdq	zmm13,zmm3,zmm15,0x10
	vpxorq	zmm9,zmm1,zmm9
	vpxorq	zmm10,zmm6,zmm10
	vpxorq	zmm12,zmm7,zmm12
	vpxorq	zmm13,zmm8,zmm13

	vpxorq	zmm12,zmm12,zmm13
	vpsrldq	zmm7,zmm12,8
	vpslldq	zmm8,zmm12,8
	vpxorq	zmm1,zmm9,zmm7
	vpxorq	zmm6,zmm10,zmm8
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_6
$L$_AAD_blocks_7_6:
	sub	r12,512
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	zmm3{k1}{z},[64+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	zmm3,zmm3,zmm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm15,ZMMWORD[240+rcx]
	vpclmulqdq	zmm9,zmm11,zmm15,0x11
	vpclmulqdq	zmm10,zmm11,zmm15,0x00
	vpclmulqdq	zmm12,zmm11,zmm15,0x01
	vpclmulqdq	zmm13,zmm11,zmm15,0x10
	vmovdqu64	ymm15,YMMWORD[304+rcx]
	vinserti64x2	zmm15,zmm15,ZMMWORD[336+rcx],2
	vpclmulqdq	zmm7,zmm3,zmm15,0x01
	vpclmulqdq	zmm8,zmm3,zmm15,0x10
	vpclmulqdq	zmm1,zmm3,zmm15,0x11
	vpclmulqdq	zmm6,zmm3,zmm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_6
$L$_AAD_blocks_6_6:
	sub	r12,512
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	ymm3{k1}{z},[64+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	ymm3,ymm3,ymm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm15,ZMMWORD[256+rcx]
	vpclmulqdq	zmm9,zmm11,zmm15,0x11
	vpclmulqdq	zmm10,zmm11,zmm15,0x00
	vpclmulqdq	zmm12,zmm11,zmm15,0x01
	vpclmulqdq	zmm13,zmm11,zmm15,0x10
	vmovdqu64	ymm15,YMMWORD[320+rcx]
	vpclmulqdq	ymm7,ymm3,ymm15,0x01
	vpclmulqdq	ymm8,ymm3,ymm15,0x10
	vpclmulqdq	ymm1,ymm3,ymm15,0x11
	vpclmulqdq	ymm6,ymm3,ymm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_6
$L$_AAD_blocks_5_6:
	sub	r12,512
	kmovq	k1,[r12]
	vmovdqu8	zmm11,ZMMWORD[r10]
	vmovdqu8	xmm3{k1}{z},[64+r10]
	vpshufb	zmm11,zmm11,zmm16
	vpshufb	xmm3,xmm3,xmm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm15,ZMMWORD[272+rcx]
	vpclmulqdq	zmm9,zmm11,zmm15,0x11
	vpclmulqdq	zmm10,zmm11,zmm15,0x00
	vpclmulqdq	zmm12,zmm11,zmm15,0x01
	vpclmulqdq	zmm13,zmm11,zmm15,0x10
	vmovdqu64	xmm15,XMMWORD[336+rcx]
	vpclmulqdq	xmm7,xmm3,xmm15,0x01
	vpclmulqdq	xmm8,xmm3,xmm15,0x10
	vpclmulqdq	xmm1,xmm3,xmm15,0x11
	vpclmulqdq	xmm6,xmm3,xmm15,0x00

	vpxorq	zmm7,zmm7,zmm12
	vpxorq	zmm8,zmm8,zmm13
	vpxorq	zmm1,zmm1,zmm9
	vpxorq	zmm6,zmm6,zmm10

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_6
$L$_AAD_blocks_4_6:
	kmovq	k1,[r12]
	vmovdqu8	zmm11{k1}{z},[r10]
	vpshufb	zmm11,zmm11,zmm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	zmm15,ZMMWORD[288+rcx]
	vpclmulqdq	zmm9,zmm11,zmm15,0x11
	vpclmulqdq	zmm10,zmm11,zmm15,0x00
	vpclmulqdq	zmm12,zmm11,zmm15,0x01
	vpclmulqdq	zmm13,zmm11,zmm15,0x10

	vpxorq	zmm12,zmm12,zmm13
	vpsrldq	zmm7,zmm12,8
	vpslldq	zmm8,zmm12,8
	vpxorq	zmm1,zmm9,zmm7
	vpxorq	zmm6,zmm10,zmm8
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_6
$L$_AAD_blocks_3_6:
	kmovq	k1,[r12]
	vmovdqu8	zmm11{k1}{z},[r10]
	vpshufb	zmm11,zmm11,zmm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	ymm15,YMMWORD[304+rcx]
	vinserti64x2	zmm15,zmm15,ZMMWORD[336+rcx],2
	vpclmulqdq	zmm7,zmm11,zmm15,0x01
	vpclmulqdq	zmm8,zmm11,zmm15,0x10
	vpclmulqdq	zmm1,zmm11,zmm15,0x11
	vpclmulqdq	zmm6,zmm11,zmm15,0x00

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_6
$L$_AAD_blocks_2_6:
	kmovq	k1,[r12]
	vmovdqu8	ymm11{k1}{z},[r10]
	vpshufb	ymm11,ymm11,ymm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	ymm15,YMMWORD[320+rcx]
	vpclmulqdq	ymm7,ymm11,ymm15,0x01
	vpclmulqdq	ymm8,ymm11,ymm15,0x10
	vpclmulqdq	ymm1,ymm11,ymm15,0x11
	vpclmulqdq	ymm6,ymm11,ymm15,0x00

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

	jmp	NEAR $L$_CALC_AAD_done_6
$L$_AAD_blocks_1_6:
	kmovq	k1,[r12]
	vmovdqu8	xmm11{k1}{z},[r10]
	vpshufb	xmm11,xmm11,xmm16
	vpxorq	zmm11,zmm11,zmm14
	vmovdqu64	xmm15,XMMWORD[336+rcx]
	vpclmulqdq	xmm7,xmm11,xmm15,0x01
	vpclmulqdq	xmm8,xmm11,xmm15,0x10
	vpclmulqdq	xmm1,xmm11,xmm15,0x11
	vpclmulqdq	xmm6,xmm11,xmm15,0x00

	vpxorq	zmm7,zmm7,zmm8
	vpsrldq	zmm12,zmm7,8
	vpslldq	zmm13,zmm7,8
	vpxorq	zmm1,zmm1,zmm12
	vpxorq	zmm6,zmm6,zmm13
	vextracti64x4	ymm12,zmm1,1
	vpxorq	ymm1,ymm1,ymm12
	vextracti32x4	xmm12,ymm1,1
	vpxorq	xmm1,xmm1,xmm12
	vextracti64x4	ymm13,zmm6,1
	vpxorq	ymm6,ymm6,ymm13
	vextracti32x4	xmm13,ymm6,1
	vpxorq	xmm6,xmm6,xmm13
	vmovdqa64	xmm15,XMMWORD[POLY2]


	vpclmulqdq	xmm7,xmm15,xmm6,0x01
	vpslldq	xmm7,xmm7,8
	vpxorq	xmm7,xmm6,xmm7


	vpclmulqdq	xmm8,xmm15,xmm7,0x00
	vpsrldq	xmm8,xmm8,4
	vpclmulqdq	xmm14,xmm15,xmm7,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm8,xmm1,0x96

$L$_CALC_AAD_done_6:
	vmovdqu64	XMMWORD[64+rcx],xmm14
	cmp	r8,256
	jbe	NEAR $L$skip_hkeys_cleanup_9
	vpxor	xmm0,xmm0,xmm0
	vmovdqa64	ZMMWORD[rsp],zmm0
	vmovdqa64	ZMMWORD[64+rsp],zmm0
	vmovdqa64	ZMMWORD[128+rsp],zmm0
	vmovdqa64	ZMMWORD[192+rsp],zmm0
	vmovdqa64	ZMMWORD[256+rsp],zmm0
	vmovdqa64	ZMMWORD[320+rsp],zmm0
	vmovdqa64	ZMMWORD[384+rsp],zmm0
	vmovdqa64	ZMMWORD[448+rsp],zmm0
	vmovdqa64	ZMMWORD[512+rsp],zmm0
	vmovdqa64	ZMMWORD[576+rsp],zmm0
	vmovdqa64	ZMMWORD[640+rsp],zmm0
	vmovdqa64	ZMMWORD[704+rsp],zmm0
$L$skip_hkeys_cleanup_9:
	vzeroupper
	vmovdqu	xmm15,XMMWORD[((-16))+rbp]
	vmovdqu	xmm14,XMMWORD[((-32))+rbp]
	vmovdqu	xmm13,XMMWORD[((-48))+rbp]
	vmovdqu	xmm12,XMMWORD[((-64))+rbp]
	vmovdqu	xmm11,XMMWORD[((-80))+rbp]
	vmovdqu	xmm10,XMMWORD[((-96))+rbp]
	vmovdqu	xmm9,XMMWORD[((-112))+rbp]
	vmovdqu	xmm8,XMMWORD[((-128))+rbp]
	vmovdqu	xmm7,XMMWORD[((-144))+rbp]
	vmovdqu	xmm6,XMMWORD[((-160))+rbp]
	lea	rsp,[8+rbp]
	pop	rsi

	pop	rdi

	pop	r15

	pop	r14

	pop	r13

	pop	r12

	pop	rbp

	pop	rbx

$L$exit_update_aad:
	DB	0F3h,0C3h		;repret
$L$ghash_seh_end:


global	ossl_aes_gcm_encrypt_avx512

ALIGN	32
ossl_aes_gcm_encrypt_avx512:

$L$encrypt_seh_begin:
DB	243,15,30,250
	push	rbx

$L$encrypt_seh_push_rbx:
	push	rbp

$L$encrypt_seh_push_rbp:
	push	r12

$L$encrypt_seh_push_r12:
	push	r13

$L$encrypt_seh_push_r13:
	push	r14

$L$encrypt_seh_push_r14:
	push	r15

$L$encrypt_seh_push_r15:
	push	rdi
$L$encrypt_seh_push_rdi:
	push	rsi
$L$encrypt_seh_push_rsi:

	sub	rsp,168
$L$encrypt_seh_allocstack_xmm:










	lea	rbp,[160+rsp]

$L$encrypt_seh_setfp:
	vmovdqu	XMMWORD[rsp],xmm6
$L$encrypt_seh_save_xmm6:
	vmovdqu	XMMWORD[16+rsp],xmm7
$L$encrypt_seh_save_xmm7:
	vmovdqu	XMMWORD[32+rsp],xmm8
$L$encrypt_seh_save_xmm8:
	vmovdqu	XMMWORD[48+rsp],xmm9
$L$encrypt_seh_save_xmm9:
	vmovdqu	XMMWORD[64+rsp],xmm10
$L$encrypt_seh_save_xmm10:
	vmovdqu	XMMWORD[80+rsp],xmm11
$L$encrypt_seh_save_xmm11:
	vmovdqu	XMMWORD[96+rsp],xmm12
$L$encrypt_seh_save_xmm12:
	vmovdqu	XMMWORD[112+rsp],xmm13
$L$encrypt_seh_save_xmm13:
	vmovdqu	XMMWORD[128+rsp],xmm14
$L$encrypt_seh_save_xmm14:
	vmovdqu	XMMWORD[144+rsp],xmm15
$L$encrypt_seh_save_xmm15:

$L$encrypt_seh_prolog_end:
	sub	rsp,1584
	and	rsp,(-64)


	mov	eax,DWORD[240+rcx]
	cmp	eax,9
	je	NEAR $L$aes_gcm_encrypt_128_avx512
	cmp	eax,11
	je	NEAR $L$aes_gcm_encrypt_192_avx512
	cmp	eax,13
	je	NEAR $L$aes_gcm_encrypt_256_avx512
	xor	eax,eax
	jmp	NEAR $L$exit_gcm_encrypt
ALIGN	32
$L$aes_gcm_encrypt_128_avx512:
	cmp	QWORD[112+rbp],0
	je	NEAR $L$_enc_dec_done_10
	xor	r14,r14
	vmovdqu64	xmm14,XMMWORD[64+rdx]

	mov	r11,QWORD[r8]
	or	r11,r11
	je	NEAR $L$_partial_block_done_11
	mov	r10d,16
	lea	r12,[byte_len_to_mask_table]
	cmp	QWORD[112+rbp],r10
	cmovc	r10,QWORD[112+rbp]
	add	r12,r10
	add	r12,r10
	kmovw	k1,[r12]
	vmovdqu8	xmm0{k1}{z},[r9]

	vmovdqu64	xmm3,XMMWORD[16+rdx]
	vmovdqu64	xmm4,XMMWORD[336+rdx]



	lea	r12,[SHIFT_MASK]
	add	r12,r11
	vmovdqu64	xmm5,XMMWORD[r12]
	vpshufb	xmm3,xmm3,xmm5
	vpxorq	xmm3,xmm3,xmm0


	mov	r13,QWORD[112+rbp]
	add	r13,r11
	sub	r13,16
	jge	NEAR $L$_no_extra_mask_11
	sub	r12,r13
$L$_no_extra_mask_11:



	vmovdqu64	xmm0,XMMWORD[16+r12]
	vpand	xmm3,xmm3,xmm0
	vpshufb	xmm3,xmm3,XMMWORD[SHUF_MASK]
	vpshufb	xmm3,xmm3,xmm5
	vpxorq	xmm14,xmm14,xmm3
	cmp	r13,0
	jl	NEAR $L$_partial_incomplete_11

	vpclmulqdq	xmm7,xmm14,xmm4,0x11
	vpclmulqdq	xmm10,xmm14,xmm4,0x00
	vpclmulqdq	xmm11,xmm14,xmm4,0x01
	vpclmulqdq	xmm14,xmm14,xmm4,0x10
	vpxorq	xmm14,xmm14,xmm11

	vpsrldq	xmm11,xmm14,8
	vpslldq	xmm14,xmm14,8
	vpxorq	xmm7,xmm7,xmm11
	vpxorq	xmm14,xmm14,xmm10



	vmovdqu64	xmm11,XMMWORD[POLY2]

	vpclmulqdq	xmm10,xmm11,xmm14,0x01
	vpslldq	xmm10,xmm10,8
	vpxorq	xmm14,xmm14,xmm10



	vpclmulqdq	xmm10,xmm11,xmm14,0x00
	vpsrldq	xmm10,xmm10,4
	vpclmulqdq	xmm14,xmm11,xmm14,0x10
	vpslldq	xmm14,xmm14,4

	vpternlogq	xmm14,xmm7,xmm10,0x96

	mov	QWORD[r8],0

	mov	r12,r11
	mov	r11,16
	sub	r11,r12
	jmp	NEAR $L$_enc_dec_done_11

$L$_partial_incomplete_11:
	mov	r12,QWORD[112+rbp]
	add	QWORD[r8],r12
	mov	r11,QWORD[112+rbp]

$L$_enc_dec_done_11:


	lea	r12,[byte_len_to_mask_table]
	kmovw	k1,[r11*2+r12]
	vmovdqu64	XMMWORD[64+rdx],xmm14

	vpshufb	xmm3,xmm3,XMMWORD[SHUF_MASK]
	vpshufb	xmm3,xmm3,xmm5
	mov	r12,QWORD[120+rbp]
	vmovdqu8	XMMWORD[r12]{k1},xmm3
$L$_partial_block_done_11:
	vmovdqu64	xmm2,XMMWORD[rdx]
	mov	r13,QWORD[112+rbp]
	sub	r13,r11
	je	NEAR $L$_enc_dec_done_10
	cmp	r13,256
	jbe	NEAR $L$_message_below_equal_16_blocks_10

	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vmovdqa64	zmm27,ZMMWORD[ddq_addbe_4444]
	vmovdqa64	zmm28,ZMMWORD[ddq_addbe_1234]






	vmovd	r15d,xmm2
	and	r15d,255

	vshufi64x2	zmm2,zmm2,zmm2,0
	vpshufb	zmm2,zmm2,zmm29



	cmp	r15b,240
	jae	NEAR $L$_next_16_overflow_12
	vpaddd	zmm7,zmm2,zmm28
	vpaddd	zmm10,zmm7,zmm27
	vpaddd	zmm11,zmm10,zmm27
	vpaddd	zmm12,zmm11,zmm27
	jmp	NEAR $L$_next_16_ok_12
$L$_next_16_overflow_12:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm12,ZMMWORD[ddq_add_4444]
	vpaddd	zmm7,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm10,zmm7,zmm12
	vpaddd	zmm11,zmm10,zmm12
	vpaddd	zmm12,zmm11,zmm12
	vpshufb	zmm7,zmm7,zmm29
	vpshufb	zmm10,zmm10,zmm29
	vpshufb	zmm11,zmm11,zmm29
	vpshufb	zmm12,zmm12,zmm29
$L$_next_16_ok_12:
	vshufi64x2	zmm2,zmm12,zmm12,255
	add	r15b,16

	vmovdqu8	zmm0,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm3,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm4,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm5,ZMMWORD[192+r11*1+r9]


	vbroadcastf64x2	zmm6,ZMMWORD[rcx]
	vpxorq	zmm7,zmm7,zmm6
	vpxorq	zmm10,zmm10,zmm6
	vpxorq	zmm11,zmm11,zmm6
	vpxorq	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[16+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[32+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[48+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[64+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[80+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[96+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[112+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[128+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[144+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[160+rcx]
	vaesenclast	zmm7,zmm7,zmm6
	vaesenclast	zmm10,zmm10,zmm6
	vaesenclast	zmm11,zmm11,zmm6
	vaesenclast	zmm12,zmm12,zmm6


	vpxorq	zmm7,zmm7,zmm0
	vpxorq	zmm10,zmm10,zmm3
	vpxorq	zmm11,zmm11,zmm4
	vpxorq	zmm12,zmm12,zmm5


	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm7
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm10
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm11
	vmovdqu8	ZMMWORD[192+r11*1+r10],zmm12

	vpshufb	zmm7,zmm7,zmm29
	vpshufb	zmm10,zmm10,zmm29
	vpshufb	zmm11,zmm11,zmm29
	vpshufb	zmm12,zmm12,zmm29
	vmovdqa64	ZMMWORD[768+rsp],zmm7
	vmovdqa64	ZMMWORD[832+rsp],zmm10
	vmovdqa64	ZMMWORD[896+rsp],zmm11
	vmovdqa64	ZMMWORD[960+rsp],zmm12
	test	r14,r14
	jnz	NEAR $L$_skip_hkeys_precomputation_13

	vmovdqu64	zmm0,ZMMWORD[288+rdx]
	vmovdqu64	ZMMWORD[704+rsp],zmm0

	vmovdqu64	zmm3,ZMMWORD[224+rdx]
	vmovdqu64	ZMMWORD[640+rsp],zmm3


	vshufi64x2	zmm3,zmm3,zmm3,0x00

	vmovdqu64	zmm4,ZMMWORD[160+rdx]
	vmovdqu64	ZMMWORD[576+rsp],zmm4

	vmovdqu64	zmm5,ZMMWORD[96+rdx]
	vmovdqu64	ZMMWORD[512+rsp],zmm5
$L$_skip_hkeys_precomputation_13:
	cmp	r13,512
	jb	NEAR $L$_message_below_32_blocks_10



	cmp	r15b,240
	jae	NEAR $L$_next_16_overflow_14
	vpaddd	zmm7,zmm2,zmm28
	vpaddd	zmm10,zmm7,zmm27
	vpaddd	zmm11,zmm10,zmm27
	vpaddd	zmm12,zmm11,zmm27
	jmp	NEAR $L$_next_16_ok_14
$L$_next_16_overflow_14:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm12,ZMMWORD[ddq_add_4444]
	vpaddd	zmm7,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm10,zmm7,zmm12
	vpaddd	zmm11,zmm10,zmm12
	vpaddd	zmm12,zmm11,zmm12
	vpshufb	zmm7,zmm7,zmm29
	vpshufb	zmm10,zmm10,zmm29
	vpshufb	zmm11,zmm11,zmm29
	vpshufb	zmm12,zmm12,zmm29
$L$_next_16_ok_14:
	vshufi64x2	zmm2,zmm12,zmm12,255
	add	r15b,16

	vmovdqu8	zmm0,ZMMWORD[256+r11*1+r9]
	vmovdqu8	zmm3,ZMMWORD[320+r11*1+r9]
	vmovdqu8	zmm4,ZMMWORD[384+r11*1+r9]
	vmovdqu8	zmm5,ZMMWORD[448+r11*1+r9]


	vbroadcastf64x2	zmm6,ZMMWORD[rcx]
	vpxorq	zmm7,zmm7,zmm6
	vpxorq	zmm10,zmm10,zmm6
	vpxorq	zmm11,zmm11,zmm6
	vpxorq	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[16+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[32+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[48+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[64+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[80+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[96+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[112+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[128+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[144+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[160+rcx]
	vaesenclast	zmm7,zmm7,zmm6
	vaesenclast	zmm10,zmm10,zmm6
	vaesenclast	zmm11,zmm11,zmm6
	vaesenclast	zmm12,zmm12,zmm6


	vpxorq	zmm7,zmm7,zmm0
	vpxorq	zmm10,zmm10,zmm3
	vpxorq	zmm11,zmm11,zmm4
	vpxorq	zmm12,zmm12,zmm5


	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[256+r11*1+r10],zmm7
	vmovdqu8	ZMMWORD[320+r11*1+r10],zmm10
	vmovdqu8	ZMMWORD[384+r11*1+r10],zmm11
	vmovdqu8	ZMMWORD[448+r11*1+r10],zmm12

	vpshufb	zmm7,zmm7,zmm29
	vpshufb	zmm10,zmm10,zmm29
	vpshufb	zmm11,zmm11,zmm29
	vpshufb	zmm12,zmm12,zmm29
	vmovdqa64	ZMMWORD[1024+rsp],zmm7
	vmovdqa64	ZMMWORD[1088+rsp],zmm10
	vmovdqa64	ZMMWORD[1152+rsp],zmm11
	vmovdqa64	ZMMWORD[1216+rsp],zmm12
	test	r14,r14
	jnz	NEAR $L$_skip_hkeys_precomputation_15
	vmovdqu64	zmm3,ZMMWORD[640+rsp]


	vshufi64x2	zmm3,zmm3,zmm3,0x00

	vmovdqu64	zmm4,ZMMWORD[576+rsp]
	vmovdqu64	zmm5,ZMMWORD[512+rsp]

	vpclmulqdq	zmm6,zmm4,zmm3,0x11
	vpclmulqdq	zmm7,zmm4,zmm3,0x00
	vpclmulqdq	zmm10,zmm4,zmm3,0x01
	vpclmulqdq	zmm4,zmm4,zmm3,0x10
	vpxorq	zmm4,zmm4,zmm10

	vpsrldq	zmm10,zmm4,8
	vpslldq	zmm4,zmm4,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm4,zmm4,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm4,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm4,zmm4,zmm7



	vpclmulqdq	zmm7,zmm10,zmm4,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm4,zmm10,zmm4,0x10
	vpslldq	zmm4,zmm4,4

	vpternlogq	zmm4,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[448+rsp],zmm4

	vpclmulqdq	zmm6,zmm5,zmm3,0x11
	vpclmulqdq	zmm7,zmm5,zmm3,0x00
	vpclmulqdq	zmm10,zmm5,zmm3,0x01
	vpclmulqdq	zmm5,zmm5,zmm3,0x10
	vpxorq	zmm5,zmm5,zmm10

	vpsrldq	zmm10,zmm5,8
	vpslldq	zmm5,zmm5,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm5,zmm5,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm5,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm5,zmm5,zmm7



	vpclmulqdq	zmm7,zmm10,zmm5,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm5,zmm10,zmm5,0x10
	vpslldq	zmm5,zmm5,4

	vpternlogq	zmm5,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[384+rsp],zmm5

	vpclmulqdq	zmm6,zmm4,zmm3,0x11
	vpclmulqdq	zmm7,zmm4,zmm3,0x00
	vpclmulqdq	zmm10,zmm4,zmm3,0x01
	vpclmulqdq	zmm4,zmm4,zmm3,0x10
	vpxorq	zmm4,zmm4,zmm10

	vpsrldq	zmm10,zmm4,8
	vpslldq	zmm4,zmm4,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm4,zmm4,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm4,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm4,zmm4,zmm7



	vpclmulqdq	zmm7,zmm10,zmm4,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm4,zmm10,zmm4,0x10
	vpslldq	zmm4,zmm4,4

	vpternlogq	zmm4,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[320+rsp],zmm4

	vpclmulqdq	zmm6,zmm5,zmm3,0x11
	vpclmulqdq	zmm7,zmm5,zmm3,0x00
	vpclmulqdq	zmm10,zmm5,zmm3,0x01
	vpclmulqdq	zmm5,zmm5,zmm3,0x10
	vpxorq	zmm5,zmm5,zmm10

	vpsrldq	zmm10,zmm5,8
	vpslldq	zmm5,zmm5,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm5,zmm5,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm5,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm5,zmm5,zmm7



	vpclmulqdq	zmm7,zmm10,zmm5,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm5,zmm10,zmm5,0x10
	vpslldq	zmm5,zmm5,4

	vpternlogq	zmm5,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[256+rsp],zmm5

	vpclmulqdq	zmm6,zmm4,zmm3,0x11
	vpclmulqdq	zmm7,zmm4,zmm3,0x00
	vpclmulqdq	zmm10,zmm4,zmm3,0x01
	vpclmulqdq	zmm4,zmm4,zmm3,0x10
	vpxorq	zmm4,zmm4,zmm10

	vpsrldq	zmm10,zmm4,8
	vpslldq	zmm4,zmm4,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm4,zmm4,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm4,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm4,zmm4,zmm7



	vpclmulqdq	zmm7,zmm10,zmm4,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm4,zmm10,zmm4,0x10
	vpslldq	zmm4,zmm4,4

	vpternlogq	zmm4,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[192+rsp],zmm4

	vpclmulqdq	zmm6,zmm5,zmm3,0x11
	vpclmulqdq	zmm7,zmm5,zmm3,0x00
	vpclmulqdq	zmm10,zmm5,zmm3,0x01
	vpclmulqdq	zmm5,zmm5,zmm3,0x10
	vpxorq	zmm5,zmm5,zmm10

	vpsrldq	zmm10,zmm5,8
	vpslldq	zmm5,zmm5,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm5,zmm5,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm5,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm5,zmm5,zmm7



	vpclmulqdq	zmm7,zmm10,zmm5,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm5,zmm10,zmm5,0x10
	vpslldq	zmm5,zmm5,4

	vpternlogq	zmm5,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[128+rsp],zmm5

	vpclmulqdq	zmm6,zmm4,zmm3,0x11
	vpclmulqdq	zmm7,zmm4,zmm3,0x00
	vpclmulqdq	zmm10,zmm4,zmm3,0x01
	vpclmulqdq	zmm4,zmm4,zmm3,0x10
	vpxorq	zmm4,zmm4,zmm10

	vpsrldq	zmm10,zmm4,8
	vpslldq	zmm4,zmm4,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm4,zmm4,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm4,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm4,zmm4,zmm7



	vpclmulqdq	zmm7,zmm10,zmm4,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm4,zmm10,zmm4,0x10
	vpslldq	zmm4,zmm4,4

	vpternlogq	zmm4,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[64+rsp],zmm4

	vpclmulqdq	zmm6,zmm5,zmm3,0x11
	vpclmulqdq	zmm7,zmm5,zmm3,0x00
	vpclmulqdq	zmm10,zmm5,zmm3,0x01
	vpclmulqdq	zmm5,zmm5,zmm3,0x10
	vpxorq	zmm5,zmm5,zmm10

	vpsrldq	zmm10,zmm5,8
	vpslldq	zmm5,zmm5,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm5,zmm5,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm5,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm5,zmm5,zmm7



	vpclmulqdq	zmm7,zmm10,zmm5,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm5,zmm10,zmm5,0x10
	vpslldq	zmm5,zmm5,4

	vpternlogq	zmm5,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[rsp],zmm5
$L$_skip_hkeys_precomputation_15:
	mov	r14,1
	add	r11,512
	sub	r13,512

	cmp	r13,768
	jb	NEAR $L$_no_more_big_nblocks_10
$L$_encrypt_big_nblocks_10:
	cmp	r15b,240
	jae	NEAR $L$_16_blocks_overflow_16
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_16
$L$_16_blocks_overflow_16:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_16:
	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rsp]




	vshufi64x2	zmm2,zmm5,zmm5,255
	add	r15b,16


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]







	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]



	vpclmulqdq	zmm6,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]



	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00


	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]



	vpternlogq	zmm6,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]



	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21,ZMMWORD[192+r11*1+r9]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00


	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm26,zmm10,zmm15
	vpxorq	zmm24,zmm6,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30



	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21



	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10],zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
	vmovdqa64	ZMMWORD[1280+rsp],zmm0
	vmovdqa64	ZMMWORD[1344+rsp],zmm3
	vmovdqa64	ZMMWORD[1408+rsp],zmm4
	vmovdqa64	ZMMWORD[1472+rsp],zmm5
	cmp	r15b,240
	jae	NEAR $L$_16_blocks_overflow_17
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_17
$L$_16_blocks_overflow_17:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_17:
	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[256+rsp]




	vshufi64x2	zmm2,zmm5,zmm5,255
	add	r15b,16


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[320+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]







	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]



	vpclmulqdq	zmm6,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[384+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[448+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]



	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00


	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]



	vpternlogq	zmm6,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]



	vmovdqu8	zmm17,ZMMWORD[256+r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[320+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[384+r11*1+r9]
	vmovdqu8	zmm21,ZMMWORD[448+r11*1+r9]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00


	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vpternlogq	zmm24,zmm6,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30



	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21



	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[256+r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[320+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[384+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[448+r11*1+r10],zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
	vmovdqa64	ZMMWORD[768+rsp],zmm0
	vmovdqa64	ZMMWORD[832+rsp],zmm3
	vmovdqa64	ZMMWORD[896+rsp],zmm4
	vmovdqa64	ZMMWORD[960+rsp],zmm5
	cmp	r15b,240
	jae	NEAR $L$_16_blocks_overflow_18
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_18
$L$_16_blocks_overflow_18:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_18:
	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]




	vshufi64x2	zmm2,zmm5,zmm5,255
	add	r15b,16


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]







	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]



	vpclmulqdq	zmm6,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]



	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00


	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]



	vpternlogq	zmm6,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]



	vmovdqu8	zmm17,ZMMWORD[512+r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[576+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[640+r11*1+r9]
	vmovdqu8	zmm21,ZMMWORD[704+r11*1+r9]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00


	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]


	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96

	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpternlogq	zmm6,zmm12,zmm15,0x96
	vpxorq	zmm6,zmm6,zmm24
	vpternlogq	zmm7,zmm13,zmm10,0x96
	vpxorq	zmm7,zmm7,zmm25
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vextracti64x4	ymm12,zmm6,1
	vpxorq	ymm6,ymm6,ymm12
	vextracti32x4	xmm12,ymm6,1
	vpxorq	xmm6,xmm6,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm6,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30



	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21



	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[512+r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[576+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[640+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[704+r11*1+r10],zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
	vmovdqa64	ZMMWORD[1024+rsp],zmm0
	vmovdqa64	ZMMWORD[1088+rsp],zmm3
	vmovdqa64	ZMMWORD[1152+rsp],zmm4
	vmovdqa64	ZMMWORD[1216+rsp],zmm5
	vmovdqa64	zmm14,zmm6

	add	r11,768
	sub	r13,768
	cmp	r13,768
	jae	NEAR $L$_encrypt_big_nblocks_10

$L$_no_more_big_nblocks_10:

	cmp	r13,512
	jae	NEAR $L$_encrypt_32_blocks_10

	cmp	r13,256
	jae	NEAR $L$_encrypt_16_blocks_10
$L$_encrypt_0_blocks_ghash_32_10:
	mov	r10d,r13d
	and	r10d,~15
	mov	ebx,256
	sub	ebx,r10d
	vmovdqa64	zmm13,ZMMWORD[768+rsp]
	vpxorq	zmm13,zmm13,zmm14
	vmovdqu64	zmm12,ZMMWORD[rbx*1+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[832+rsp]
	vmovdqu64	zmm12,ZMMWORD[64+rbx*1+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10
	vpxorq	zmm26,zmm4,zmm10
	vpxorq	zmm24,zmm0,zmm6
	vpxorq	zmm25,zmm3,zmm7
	vpternlogq	zmm26,zmm5,zmm11,0x96
	vmovdqa64	zmm13,ZMMWORD[896+rsp]
	vmovdqu64	zmm12,ZMMWORD[128+rbx*1+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[960+rsp]
	vmovdqu64	zmm12,ZMMWORD[192+rbx*1+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10

	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96
	add	ebx,256
	mov	r10d,r13d
	add	r10d,15
	shr	r10d,4
	je	NEAR $L$_last_num_blocks_is_0_19

	cmp	r10d,8
	je	NEAR $L$_last_num_blocks_is_8_19
	jb	NEAR $L$_last_num_blocks_is_7_1_19


	cmp	r10d,12
	je	NEAR $L$_last_num_blocks_is_12_19
	jb	NEAR $L$_last_num_blocks_is_11_9_19


	cmp	r10d,15
	je	NEAR $L$_last_num_blocks_is_15_19
	ja	NEAR $L$_last_num_blocks_is_16_19
	cmp	r10d,14
	je	NEAR $L$_last_num_blocks_is_14_19
	jmp	NEAR $L$_last_num_blocks_is_13_19

$L$_last_num_blocks_is_11_9_19:

	cmp	r10d,10
	je	NEAR $L$_last_num_blocks_is_10_19
	ja	NEAR $L$_last_num_blocks_is_11_19
	jmp	NEAR $L$_last_num_blocks_is_9_19

$L$_last_num_blocks_is_7_1_19:
	cmp	r10d,4
	je	NEAR $L$_last_num_blocks_is_4_19
	jb	NEAR $L$_last_num_blocks_is_3_1_19

	cmp	r10d,6
	ja	NEAR $L$_last_num_blocks_is_7_19
	je	NEAR $L$_last_num_blocks_is_6_19
	jmp	NEAR $L$_last_num_blocks_is_5_19

$L$_last_num_blocks_is_3_1_19:

	cmp	r10d,2
	ja	NEAR $L$_last_num_blocks_is_3_19
	je	NEAR $L$_last_num_blocks_is_2_19
$L$_last_num_blocks_is_1_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,255
	jae	NEAR $L$_16_blocks_overflow_20
	vpaddd	xmm0,xmm2,xmm28
	jmp	NEAR $L$_16_blocks_ok_20

$L$_16_blocks_overflow_20:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	xmm0,xmm0,xmm29
$L$_16_blocks_ok_20:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	xmm17{k1}{z},[r11*1+r9]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	xmm0,xmm0,xmm31
	vaesenclast	xmm0,xmm0,xmm30
	vpxorq	xmm0,xmm0,xmm17
	vextracti32x4	xmm11,zmm0,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	XMMWORD[r11*1+r10]{k1},xmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	xmm17,xmm0,xmm29
	vextracti32x4	xmm7,zmm17,0


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_21





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_21
$L$_small_initial_partial_block_21:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11


	vpsrldq	zmm0,zmm26,8
	vpslldq	zmm3,zmm26,8
	vpxorq	zmm24,zmm24,zmm0
	vpxorq	zmm25,zmm25,zmm3
	vextracti64x4	ymm0,zmm24,1
	vpxorq	ymm24,ymm24,ymm0
	vextracti32x4	xmm0,ymm24,1
	vpxorq	xmm24,xmm24,xmm0
	vextracti64x4	ymm3,zmm25,1
	vpxorq	ymm25,ymm25,ymm3
	vextracti32x4	xmm3,ymm25,1
	vpxorq	xmm25,xmm25,xmm3
	vmovdqa64	xmm0,XMMWORD[POLY2]


	vpclmulqdq	xmm3,xmm0,xmm25,0x01
	vpslldq	xmm3,xmm3,8
	vpxorq	xmm3,xmm25,xmm3


	vpclmulqdq	xmm4,xmm0,xmm3,0x00
	vpsrldq	xmm4,xmm4,4
	vpclmulqdq	xmm14,xmm0,xmm3,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm4,xmm24,0x96












	vpxorq	xmm14,xmm14,xmm7

	jmp	NEAR $L$_after_reduction_21
$L$_small_initial_compute_done_21:
$L$_after_reduction_21:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_2_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,254
	jae	NEAR $L$_16_blocks_overflow_22
	vpaddd	ymm0,ymm2,ymm28
	jmp	NEAR $L$_16_blocks_ok_22

$L$_16_blocks_overflow_22:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	ymm0,ymm0,ymm29
$L$_16_blocks_ok_22:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	ymm17{k1}{z},[r11*1+r9]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	ymm0,ymm0,ymm31
	vaesenclast	ymm0,ymm0,ymm30
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm11,zmm0,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	YMMWORD[r11*1+r10]{k1},ymm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	ymm17,ymm0,ymm29
	vextracti32x4	xmm7,zmm17,1
	sub	r13,16 * (2 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_23





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_23
$L$_small_initial_partial_block_23:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_23:

	or	r13,r13
	je	NEAR $L$_after_reduction_23
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_23:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_3_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,253
	jae	NEAR $L$_16_blocks_overflow_24
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_24

$L$_16_blocks_overflow_24:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_24:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,2
	sub	r13,16 * (3 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_25





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_25
$L$_small_initial_partial_block_25:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_25:

	or	r13,r13
	je	NEAR $L$_after_reduction_25
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_25:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_4_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,252
	jae	NEAR $L$_16_blocks_overflow_26
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_26

$L$_16_blocks_overflow_26:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_26:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,3
	sub	r13,16 * (4 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_27





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_27
$L$_small_initial_partial_block_27:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_27:

	or	r13,r13
	je	NEAR $L$_after_reduction_27
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_27:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_5_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,251
	jae	NEAR $L$_16_blocks_overflow_28
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	xmm3,xmm0,xmm27
	jmp	NEAR $L$_16_blocks_ok_28

$L$_16_blocks_overflow_28:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	xmm3,xmm3,xmm29
$L$_16_blocks_ok_28:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	xmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	xmm3,xmm3,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	xmm3,xmm3,xmm19
	vextracti32x4	xmm11,zmm3,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	XMMWORD[64+r11*1+r10]{k1},xmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	xmm19,xmm3,xmm29
	vextracti32x4	xmm7,zmm19,0
	sub	r13,16 * (5 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_29





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_29
$L$_small_initial_partial_block_29:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_29:

	or	r13,r13
	je	NEAR $L$_after_reduction_29
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_29:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_6_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,250
	jae	NEAR $L$_16_blocks_overflow_30
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	ymm3,ymm0,ymm27
	jmp	NEAR $L$_16_blocks_ok_30

$L$_16_blocks_overflow_30:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	ymm3,ymm3,ymm29
$L$_16_blocks_ok_30:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	ymm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	ymm3,ymm3,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm11,zmm3,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	YMMWORD[64+r11*1+r10]{k1},ymm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	ymm19,ymm3,ymm29
	vextracti32x4	xmm7,zmm19,1
	sub	r13,16 * (6 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_31





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_31
$L$_small_initial_partial_block_31:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_31:

	or	r13,r13
	je	NEAR $L$_after_reduction_31
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_31:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_7_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,249
	jae	NEAR $L$_16_blocks_overflow_32
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_32

$L$_16_blocks_overflow_32:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_32:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,2
	sub	r13,16 * (7 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_33





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_33
$L$_small_initial_partial_block_33:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_33:

	or	r13,r13
	je	NEAR $L$_after_reduction_33
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_33:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_8_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,248
	jae	NEAR $L$_16_blocks_overflow_34
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_34

$L$_16_blocks_overflow_34:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_34:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,3
	sub	r13,16 * (8 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_35





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_35
$L$_small_initial_partial_block_35:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_35:

	or	r13,r13
	je	NEAR $L$_after_reduction_35
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_35:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_9_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,247
	jae	NEAR $L$_16_blocks_overflow_36
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	xmm4,xmm3,xmm27
	jmp	NEAR $L$_16_blocks_ok_36

$L$_16_blocks_overflow_36:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	xmm4,xmm4,xmm29
$L$_16_blocks_ok_36:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	xmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	xmm4,xmm4,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	xmm4,xmm4,xmm20
	vextracti32x4	xmm11,zmm4,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	XMMWORD[128+r11*1+r10]{k1},xmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	xmm20,xmm4,xmm29
	vextracti32x4	xmm7,zmm20,0
	sub	r13,16 * (9 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_37





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm20,xmm1,0x01
	vpclmulqdq	xmm5,xmm20,xmm1,0x10
	vpclmulqdq	xmm0,xmm20,xmm1,0x11
	vpclmulqdq	xmm3,xmm20,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_37
$L$_small_initial_partial_block_37:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_37:

	or	r13,r13
	je	NEAR $L$_after_reduction_37
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_37:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_10_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,246
	jae	NEAR $L$_16_blocks_overflow_38
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	ymm4,ymm3,ymm27
	jmp	NEAR $L$_16_blocks_ok_38

$L$_16_blocks_overflow_38:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	ymm4,ymm4,ymm29
$L$_16_blocks_ok_38:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	ymm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	ymm4,ymm4,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	ymm4,ymm4,ymm20
	vextracti32x4	xmm11,zmm4,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	YMMWORD[128+r11*1+r10]{k1},ymm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	ymm20,ymm4,ymm29
	vextracti32x4	xmm7,zmm20,1
	sub	r13,16 * (10 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_39





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm20,ymm1,0x01
	vpclmulqdq	ymm5,ymm20,ymm1,0x10
	vpclmulqdq	ymm0,ymm20,ymm1,0x11
	vpclmulqdq	ymm3,ymm20,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_39
$L$_small_initial_partial_block_39:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm20,xmm1,0x01
	vpclmulqdq	xmm5,xmm20,xmm1,0x10
	vpclmulqdq	xmm0,xmm20,xmm1,0x11
	vpclmulqdq	xmm3,xmm20,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_39:

	or	r13,r13
	je	NEAR $L$_after_reduction_39
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_39:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_11_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,245
	jae	NEAR $L$_16_blocks_overflow_40
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	jmp	NEAR $L$_16_blocks_ok_40

$L$_16_blocks_overflow_40:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
$L$_16_blocks_ok_40:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vextracti32x4	xmm11,zmm4,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vextracti32x4	xmm7,zmm20,2
	sub	r13,16 * (11 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_41





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm20,zmm1,0x01
	vpclmulqdq	zmm5,zmm20,zmm1,0x10
	vpclmulqdq	zmm0,zmm20,zmm1,0x11
	vpclmulqdq	zmm3,zmm20,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_41
$L$_small_initial_partial_block_41:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm20,ymm1,0x01
	vpclmulqdq	ymm5,ymm20,ymm1,0x10
	vpclmulqdq	ymm0,ymm20,ymm1,0x11
	vpclmulqdq	ymm3,ymm20,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_41:

	or	r13,r13
	je	NEAR $L$_after_reduction_41
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_41:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_12_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,244
	jae	NEAR $L$_16_blocks_overflow_42
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	jmp	NEAR $L$_16_blocks_ok_42

$L$_16_blocks_overflow_42:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
$L$_16_blocks_ok_42:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vextracti32x4	xmm11,zmm4,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vextracti32x4	xmm7,zmm20,3
	sub	r13,16 * (12 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_43





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_43
$L$_small_initial_partial_block_43:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm20,zmm1,0x01
	vpclmulqdq	zmm5,zmm20,zmm1,0x10
	vpclmulqdq	zmm0,zmm20,zmm1,0x11
	vpclmulqdq	zmm3,zmm20,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_43:

	or	r13,r13
	je	NEAR $L$_after_reduction_43
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_43:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_13_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,243
	jae	NEAR $L$_16_blocks_overflow_44
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	xmm5,xmm4,xmm27
	jmp	NEAR $L$_16_blocks_ok_44

$L$_16_blocks_overflow_44:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	xmm5,xmm5,xmm29
$L$_16_blocks_ok_44:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	xmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	xmm5,xmm5,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	xmm5,xmm5,xmm21
	vextracti32x4	xmm11,zmm5,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	XMMWORD[192+r11*1+r10]{k1},xmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	xmm21,xmm5,xmm29
	vextracti32x4	xmm7,zmm21,0
	sub	r13,16 * (13 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_45





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm21,xmm1,0x01
	vpclmulqdq	xmm5,xmm21,xmm1,0x10
	vpclmulqdq	xmm0,xmm21,xmm1,0x11
	vpclmulqdq	xmm3,xmm21,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_45
$L$_small_initial_partial_block_45:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_45:

	or	r13,r13
	je	NEAR $L$_after_reduction_45
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_45:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_14_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,242
	jae	NEAR $L$_16_blocks_overflow_46
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	ymm5,ymm4,ymm27
	jmp	NEAR $L$_16_blocks_ok_46

$L$_16_blocks_overflow_46:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	ymm5,ymm5,ymm29
$L$_16_blocks_ok_46:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	ymm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	ymm5,ymm5,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	ymm5,ymm5,ymm21
	vextracti32x4	xmm11,zmm5,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	YMMWORD[192+r11*1+r10]{k1},ymm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	ymm21,ymm5,ymm29
	vextracti32x4	xmm7,zmm21,1
	sub	r13,16 * (14 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_47





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm21,ymm1,0x01
	vpclmulqdq	ymm5,ymm21,ymm1,0x10
	vpclmulqdq	ymm0,ymm21,ymm1,0x11
	vpclmulqdq	ymm3,ymm21,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_47
$L$_small_initial_partial_block_47:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm21,xmm1,0x01
	vpclmulqdq	xmm5,xmm21,xmm1,0x10
	vpclmulqdq	xmm0,xmm21,xmm1,0x11
	vpclmulqdq	xmm3,xmm21,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_47:

	or	r13,r13
	je	NEAR $L$_after_reduction_47
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_47:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_15_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,241
	jae	NEAR $L$_16_blocks_overflow_48
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_48

$L$_16_blocks_overflow_48:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_48:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21
	vextracti32x4	xmm11,zmm5,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	zmm21,zmm5,zmm29
	vextracti32x4	xmm7,zmm21,2
	sub	r13,16 * (15 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_49





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm21,zmm1,0x01
	vpclmulqdq	zmm5,zmm21,zmm1,0x10
	vpclmulqdq	zmm0,zmm21,zmm1,0x11
	vpclmulqdq	zmm3,zmm21,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_49
$L$_small_initial_partial_block_49:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm21,ymm1,0x01
	vpclmulqdq	ymm5,ymm21,ymm1,0x10
	vpclmulqdq	ymm0,ymm21,ymm1,0x11
	vpclmulqdq	ymm3,ymm21,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_49:

	or	r13,r13
	je	NEAR $L$_after_reduction_49
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_49:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_16_19:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,240
	jae	NEAR $L$_16_blocks_overflow_50
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_50

$L$_16_blocks_overflow_50:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_50:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21
	vextracti32x4	xmm11,zmm5,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	zmm21,zmm5,zmm29
	vextracti32x4	xmm7,zmm21,3
	sub	r13,16 * (16 - 1)
$L$_small_initial_partial_block_51:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm21,zmm1,0x01
	vpclmulqdq	zmm5,zmm21,zmm1,0x10
	vpclmulqdq	zmm0,zmm21,zmm1,0x11
	vpclmulqdq	zmm3,zmm21,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_51:
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_51:
	jmp	NEAR $L$_last_blocks_done_19
$L$_last_num_blocks_is_0_19:
	vmovdqa64	zmm13,ZMMWORD[1024+rsp]
	vmovdqu64	zmm12,ZMMWORD[rbx*1+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1088+rsp]
	vmovdqu64	zmm12,ZMMWORD[64+rbx*1+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10
	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96
	vmovdqa64	zmm13,ZMMWORD[1152+rsp]
	vmovdqu64	zmm12,ZMMWORD[128+rbx*1+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1216+rsp]
	vmovdqu64	zmm12,ZMMWORD[192+rbx*1+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10

	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96

	vpsrldq	zmm0,zmm26,8
	vpslldq	zmm3,zmm26,8
	vpxorq	zmm24,zmm24,zmm0
	vpxorq	zmm25,zmm25,zmm3
	vextracti64x4	ymm0,zmm24,1
	vpxorq	ymm24,ymm24,ymm0
	vextracti32x4	xmm0,ymm24,1
	vpxorq	xmm24,xmm24,xmm0
	vextracti64x4	ymm3,zmm25,1
	vpxorq	ymm25,ymm25,ymm3
	vextracti32x4	xmm3,ymm25,1
	vpxorq	xmm25,xmm25,xmm3
	vmovdqa64	xmm4,XMMWORD[POLY2]


	vpclmulqdq	xmm0,xmm4,xmm25,0x01
	vpslldq	xmm0,xmm0,8
	vpxorq	xmm0,xmm25,xmm0


	vpclmulqdq	xmm3,xmm4,xmm0,0x00
	vpsrldq	xmm3,xmm3,4
	vpclmulqdq	xmm14,xmm4,xmm0,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm3,xmm24,0x96

$L$_last_blocks_done_19:
	vpshufb	xmm2,xmm2,xmm29
	jmp	NEAR $L$_ghash_done_10
$L$_encrypt_32_blocks_10:
	cmp	r15b,240
	jae	NEAR $L$_16_blocks_overflow_52
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_52
$L$_16_blocks_overflow_52:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_52:
	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rsp]




	vshufi64x2	zmm2,zmm5,zmm5,255
	add	r15b,16


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]







	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]



	vpclmulqdq	zmm6,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]



	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00


	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]



	vpternlogq	zmm6,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]



	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21,ZMMWORD[192+r11*1+r9]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00


	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm26,zmm10,zmm15
	vpxorq	zmm24,zmm6,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30



	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21



	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10],zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
	vmovdqa64	ZMMWORD[1280+rsp],zmm0
	vmovdqa64	ZMMWORD[1344+rsp],zmm3
	vmovdqa64	ZMMWORD[1408+rsp],zmm4
	vmovdqa64	ZMMWORD[1472+rsp],zmm5
	cmp	r15b,240
	jae	NEAR $L$_16_blocks_overflow_53
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_53
$L$_16_blocks_overflow_53:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_53:
	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[256+rsp]




	vshufi64x2	zmm2,zmm5,zmm5,255
	add	r15b,16


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[320+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]







	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]



	vpclmulqdq	zmm6,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[384+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[448+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]



	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00


	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]



	vpternlogq	zmm6,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]



	vmovdqu8	zmm17,ZMMWORD[256+r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[320+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[384+r11*1+r9]
	vmovdqu8	zmm21,ZMMWORD[448+r11*1+r9]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00


	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vpternlogq	zmm24,zmm6,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30



	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21



	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[256+r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[320+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[384+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[448+r11*1+r10],zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
	vmovdqa64	ZMMWORD[768+rsp],zmm0
	vmovdqa64	ZMMWORD[832+rsp],zmm3
	vmovdqa64	ZMMWORD[896+rsp],zmm4
	vmovdqa64	ZMMWORD[960+rsp],zmm5
	vmovdqa64	zmm13,ZMMWORD[1280+rsp]
	vmovdqu64	zmm12,ZMMWORD[512+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1344+rsp]
	vmovdqu64	zmm12,ZMMWORD[576+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10
	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96
	vmovdqa64	zmm13,ZMMWORD[1408+rsp]
	vmovdqu64	zmm12,ZMMWORD[640+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1472+rsp]
	vmovdqu64	zmm12,ZMMWORD[704+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10

	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96

	vpsrldq	zmm0,zmm26,8
	vpslldq	zmm3,zmm26,8
	vpxorq	zmm24,zmm24,zmm0
	vpxorq	zmm25,zmm25,zmm3
	vextracti64x4	ymm0,zmm24,1
	vpxorq	ymm24,ymm24,ymm0
	vextracti32x4	xmm0,ymm24,1
	vpxorq	xmm24,xmm24,xmm0
	vextracti64x4	ymm3,zmm25,1
	vpxorq	ymm25,ymm25,ymm3
	vextracti32x4	xmm3,ymm25,1
	vpxorq	xmm25,xmm25,xmm3
	vmovdqa64	xmm4,XMMWORD[POLY2]


	vpclmulqdq	xmm0,xmm4,xmm25,0x01
	vpslldq	xmm0,xmm0,8
	vpxorq	xmm0,xmm25,xmm0


	vpclmulqdq	xmm3,xmm4,xmm0,0x00
	vpsrldq	xmm3,xmm3,4
	vpclmulqdq	xmm14,xmm4,xmm0,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm3,xmm24,0x96

	sub	r13,512
	add	r11,512
	mov	r10d,r13d
	and	r10d,~15
	mov	ebx,512
	sub	ebx,r10d
	mov	r10d,r13d
	add	r10d,15
	shr	r10d,4
	je	NEAR $L$_last_num_blocks_is_0_54

	cmp	r10d,8
	je	NEAR $L$_last_num_blocks_is_8_54
	jb	NEAR $L$_last_num_blocks_is_7_1_54


	cmp	r10d,12
	je	NEAR $L$_last_num_blocks_is_12_54
	jb	NEAR $L$_last_num_blocks_is_11_9_54


	cmp	r10d,15
	je	NEAR $L$_last_num_blocks_is_15_54
	ja	NEAR $L$_last_num_blocks_is_16_54
	cmp	r10d,14
	je	NEAR $L$_last_num_blocks_is_14_54
	jmp	NEAR $L$_last_num_blocks_is_13_54

$L$_last_num_blocks_is_11_9_54:

	cmp	r10d,10
	je	NEAR $L$_last_num_blocks_is_10_54
	ja	NEAR $L$_last_num_blocks_is_11_54
	jmp	NEAR $L$_last_num_blocks_is_9_54

$L$_last_num_blocks_is_7_1_54:
	cmp	r10d,4
	je	NEAR $L$_last_num_blocks_is_4_54
	jb	NEAR $L$_last_num_blocks_is_3_1_54

	cmp	r10d,6
	ja	NEAR $L$_last_num_blocks_is_7_54
	je	NEAR $L$_last_num_blocks_is_6_54
	jmp	NEAR $L$_last_num_blocks_is_5_54

$L$_last_num_blocks_is_3_1_54:

	cmp	r10d,2
	ja	NEAR $L$_last_num_blocks_is_3_54
	je	NEAR $L$_last_num_blocks_is_2_54
$L$_last_num_blocks_is_1_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,255
	jae	NEAR $L$_16_blocks_overflow_55
	vpaddd	xmm0,xmm2,xmm28
	jmp	NEAR $L$_16_blocks_ok_55

$L$_16_blocks_overflow_55:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	xmm0,xmm0,xmm29
$L$_16_blocks_ok_55:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	xmm17{k1}{z},[r11*1+r9]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	xmm0,xmm0,xmm31
	vaesenclast	xmm0,xmm0,xmm30
	vpxorq	xmm0,xmm0,xmm17
	vextracti32x4	xmm11,zmm0,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	XMMWORD[r11*1+r10]{k1},xmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	xmm17,xmm0,xmm29
	vextracti32x4	xmm7,zmm17,0


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_56





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_56
$L$_small_initial_partial_block_56:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11


	vpsrldq	zmm0,zmm26,8
	vpslldq	zmm3,zmm26,8
	vpxorq	zmm24,zmm24,zmm0
	vpxorq	zmm25,zmm25,zmm3
	vextracti64x4	ymm0,zmm24,1
	vpxorq	ymm24,ymm24,ymm0
	vextracti32x4	xmm0,ymm24,1
	vpxorq	xmm24,xmm24,xmm0
	vextracti64x4	ymm3,zmm25,1
	vpxorq	ymm25,ymm25,ymm3
	vextracti32x4	xmm3,ymm25,1
	vpxorq	xmm25,xmm25,xmm3
	vmovdqa64	xmm0,XMMWORD[POLY2]


	vpclmulqdq	xmm3,xmm0,xmm25,0x01
	vpslldq	xmm3,xmm3,8
	vpxorq	xmm3,xmm25,xmm3


	vpclmulqdq	xmm4,xmm0,xmm3,0x00
	vpsrldq	xmm4,xmm4,4
	vpclmulqdq	xmm14,xmm0,xmm3,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm4,xmm24,0x96












	vpxorq	xmm14,xmm14,xmm7

	jmp	NEAR $L$_after_reduction_56
$L$_small_initial_compute_done_56:
$L$_after_reduction_56:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_2_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,254
	jae	NEAR $L$_16_blocks_overflow_57
	vpaddd	ymm0,ymm2,ymm28
	jmp	NEAR $L$_16_blocks_ok_57

$L$_16_blocks_overflow_57:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	ymm0,ymm0,ymm29
$L$_16_blocks_ok_57:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	ymm17{k1}{z},[r11*1+r9]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	ymm0,ymm0,ymm31
	vaesenclast	ymm0,ymm0,ymm30
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm11,zmm0,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	YMMWORD[r11*1+r10]{k1},ymm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	ymm17,ymm0,ymm29
	vextracti32x4	xmm7,zmm17,1
	sub	r13,16 * (2 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_58





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_58
$L$_small_initial_partial_block_58:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_58:

	or	r13,r13
	je	NEAR $L$_after_reduction_58
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_58:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_3_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,253
	jae	NEAR $L$_16_blocks_overflow_59
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_59

$L$_16_blocks_overflow_59:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_59:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,2
	sub	r13,16 * (3 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_60





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_60
$L$_small_initial_partial_block_60:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_60:

	or	r13,r13
	je	NEAR $L$_after_reduction_60
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_60:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_4_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,252
	jae	NEAR $L$_16_blocks_overflow_61
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_61

$L$_16_blocks_overflow_61:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_61:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,3
	sub	r13,16 * (4 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_62





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_62
$L$_small_initial_partial_block_62:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_62:

	or	r13,r13
	je	NEAR $L$_after_reduction_62
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_62:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_5_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,251
	jae	NEAR $L$_16_blocks_overflow_63
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	xmm3,xmm0,xmm27
	jmp	NEAR $L$_16_blocks_ok_63

$L$_16_blocks_overflow_63:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	xmm3,xmm3,xmm29
$L$_16_blocks_ok_63:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	xmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	xmm3,xmm3,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	xmm3,xmm3,xmm19
	vextracti32x4	xmm11,zmm3,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	XMMWORD[64+r11*1+r10]{k1},xmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	xmm19,xmm3,xmm29
	vextracti32x4	xmm7,zmm19,0
	sub	r13,16 * (5 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_64





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_64
$L$_small_initial_partial_block_64:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_64:

	or	r13,r13
	je	NEAR $L$_after_reduction_64
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_64:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_6_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,250
	jae	NEAR $L$_16_blocks_overflow_65
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	ymm3,ymm0,ymm27
	jmp	NEAR $L$_16_blocks_ok_65

$L$_16_blocks_overflow_65:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	ymm3,ymm3,ymm29
$L$_16_blocks_ok_65:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	ymm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	ymm3,ymm3,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm11,zmm3,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	YMMWORD[64+r11*1+r10]{k1},ymm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	ymm19,ymm3,ymm29
	vextracti32x4	xmm7,zmm19,1
	sub	r13,16 * (6 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_66





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_66
$L$_small_initial_partial_block_66:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_66:

	or	r13,r13
	je	NEAR $L$_after_reduction_66
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_66:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_7_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,249
	jae	NEAR $L$_16_blocks_overflow_67
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_67

$L$_16_blocks_overflow_67:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_67:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,2
	sub	r13,16 * (7 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_68





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_68
$L$_small_initial_partial_block_68:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_68:

	or	r13,r13
	je	NEAR $L$_after_reduction_68
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_68:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_8_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,248
	jae	NEAR $L$_16_blocks_overflow_69
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_69

$L$_16_blocks_overflow_69:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_69:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,3
	sub	r13,16 * (8 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_70





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_70
$L$_small_initial_partial_block_70:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_70:

	or	r13,r13
	je	NEAR $L$_after_reduction_70
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_70:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_9_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,247
	jae	NEAR $L$_16_blocks_overflow_71
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	xmm4,xmm3,xmm27
	jmp	NEAR $L$_16_blocks_ok_71

$L$_16_blocks_overflow_71:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	xmm4,xmm4,xmm29
$L$_16_blocks_ok_71:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	xmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	xmm4,xmm4,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	xmm4,xmm4,xmm20
	vextracti32x4	xmm11,zmm4,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	XMMWORD[128+r11*1+r10]{k1},xmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	xmm20,xmm4,xmm29
	vextracti32x4	xmm7,zmm20,0
	sub	r13,16 * (9 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_72





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm20,xmm1,0x01
	vpclmulqdq	xmm5,xmm20,xmm1,0x10
	vpclmulqdq	xmm0,xmm20,xmm1,0x11
	vpclmulqdq	xmm3,xmm20,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_72
$L$_small_initial_partial_block_72:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_72:

	or	r13,r13
	je	NEAR $L$_after_reduction_72
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_72:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_10_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,246
	jae	NEAR $L$_16_blocks_overflow_73
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	ymm4,ymm3,ymm27
	jmp	NEAR $L$_16_blocks_ok_73

$L$_16_blocks_overflow_73:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	ymm4,ymm4,ymm29
$L$_16_blocks_ok_73:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	ymm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	ymm4,ymm4,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	ymm4,ymm4,ymm20
	vextracti32x4	xmm11,zmm4,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	YMMWORD[128+r11*1+r10]{k1},ymm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	ymm20,ymm4,ymm29
	vextracti32x4	xmm7,zmm20,1
	sub	r13,16 * (10 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_74





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm20,ymm1,0x01
	vpclmulqdq	ymm5,ymm20,ymm1,0x10
	vpclmulqdq	ymm0,ymm20,ymm1,0x11
	vpclmulqdq	ymm3,ymm20,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_74
$L$_small_initial_partial_block_74:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm20,xmm1,0x01
	vpclmulqdq	xmm5,xmm20,xmm1,0x10
	vpclmulqdq	xmm0,xmm20,xmm1,0x11
	vpclmulqdq	xmm3,xmm20,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_74:

	or	r13,r13
	je	NEAR $L$_after_reduction_74
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_74:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_11_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,245
	jae	NEAR $L$_16_blocks_overflow_75
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	jmp	NEAR $L$_16_blocks_ok_75

$L$_16_blocks_overflow_75:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
$L$_16_blocks_ok_75:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vextracti32x4	xmm11,zmm4,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vextracti32x4	xmm7,zmm20,2
	sub	r13,16 * (11 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_76





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm20,zmm1,0x01
	vpclmulqdq	zmm5,zmm20,zmm1,0x10
	vpclmulqdq	zmm0,zmm20,zmm1,0x11
	vpclmulqdq	zmm3,zmm20,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_76
$L$_small_initial_partial_block_76:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm20,ymm1,0x01
	vpclmulqdq	ymm5,ymm20,ymm1,0x10
	vpclmulqdq	ymm0,ymm20,ymm1,0x11
	vpclmulqdq	ymm3,ymm20,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_76:

	or	r13,r13
	je	NEAR $L$_after_reduction_76
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_76:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_12_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,244
	jae	NEAR $L$_16_blocks_overflow_77
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	jmp	NEAR $L$_16_blocks_ok_77

$L$_16_blocks_overflow_77:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
$L$_16_blocks_ok_77:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vextracti32x4	xmm11,zmm4,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vextracti32x4	xmm7,zmm20,3
	sub	r13,16 * (12 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_78





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_78
$L$_small_initial_partial_block_78:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm20,zmm1,0x01
	vpclmulqdq	zmm5,zmm20,zmm1,0x10
	vpclmulqdq	zmm0,zmm20,zmm1,0x11
	vpclmulqdq	zmm3,zmm20,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_78:

	or	r13,r13
	je	NEAR $L$_after_reduction_78
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_78:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_13_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,243
	jae	NEAR $L$_16_blocks_overflow_79
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	xmm5,xmm4,xmm27
	jmp	NEAR $L$_16_blocks_ok_79

$L$_16_blocks_overflow_79:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	xmm5,xmm5,xmm29
$L$_16_blocks_ok_79:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	xmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	xmm5,xmm5,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	xmm5,xmm5,xmm21
	vextracti32x4	xmm11,zmm5,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	XMMWORD[192+r11*1+r10]{k1},xmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	xmm21,xmm5,xmm29
	vextracti32x4	xmm7,zmm21,0
	sub	r13,16 * (13 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_80





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm21,xmm1,0x01
	vpclmulqdq	xmm5,xmm21,xmm1,0x10
	vpclmulqdq	xmm0,xmm21,xmm1,0x11
	vpclmulqdq	xmm3,xmm21,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_80
$L$_small_initial_partial_block_80:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_80:

	or	r13,r13
	je	NEAR $L$_after_reduction_80
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_80:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_14_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,242
	jae	NEAR $L$_16_blocks_overflow_81
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	ymm5,ymm4,ymm27
	jmp	NEAR $L$_16_blocks_ok_81

$L$_16_blocks_overflow_81:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	ymm5,ymm5,ymm29
$L$_16_blocks_ok_81:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	ymm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	ymm5,ymm5,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	ymm5,ymm5,ymm21
	vextracti32x4	xmm11,zmm5,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	YMMWORD[192+r11*1+r10]{k1},ymm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	ymm21,ymm5,ymm29
	vextracti32x4	xmm7,zmm21,1
	sub	r13,16 * (14 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_82





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm21,ymm1,0x01
	vpclmulqdq	ymm5,ymm21,ymm1,0x10
	vpclmulqdq	ymm0,ymm21,ymm1,0x11
	vpclmulqdq	ymm3,ymm21,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_82
$L$_small_initial_partial_block_82:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm21,xmm1,0x01
	vpclmulqdq	xmm5,xmm21,xmm1,0x10
	vpclmulqdq	xmm0,xmm21,xmm1,0x11
	vpclmulqdq	xmm3,xmm21,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_82:

	or	r13,r13
	je	NEAR $L$_after_reduction_82
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_82:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_15_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,241
	jae	NEAR $L$_16_blocks_overflow_83
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_83

$L$_16_blocks_overflow_83:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_83:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21
	vextracti32x4	xmm11,zmm5,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	zmm21,zmm5,zmm29
	vextracti32x4	xmm7,zmm21,2
	sub	r13,16 * (15 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_84





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm21,zmm1,0x01
	vpclmulqdq	zmm5,zmm21,zmm1,0x10
	vpclmulqdq	zmm0,zmm21,zmm1,0x11
	vpclmulqdq	zmm3,zmm21,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_84
$L$_small_initial_partial_block_84:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm21,ymm1,0x01
	vpclmulqdq	ymm5,ymm21,ymm1,0x10
	vpclmulqdq	ymm0,ymm21,ymm1,0x11
	vpclmulqdq	ymm3,ymm21,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_84:

	or	r13,r13
	je	NEAR $L$_after_reduction_84
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_84:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_16_54:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,240
	jae	NEAR $L$_16_blocks_overflow_85
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_85

$L$_16_blocks_overflow_85:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_85:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21
	vextracti32x4	xmm11,zmm5,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	zmm21,zmm5,zmm29
	vextracti32x4	xmm7,zmm21,3
	sub	r13,16 * (16 - 1)
$L$_small_initial_partial_block_86:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm21,zmm1,0x01
	vpclmulqdq	zmm5,zmm21,zmm1,0x10
	vpclmulqdq	zmm0,zmm21,zmm1,0x11
	vpclmulqdq	zmm3,zmm21,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_86:
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_86:
	jmp	NEAR $L$_last_blocks_done_54
$L$_last_num_blocks_is_0_54:
	vmovdqa64	zmm13,ZMMWORD[768+rsp]
	vpxorq	zmm13,zmm13,zmm14
	vmovdqu64	zmm12,ZMMWORD[rbx*1+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[832+rsp]
	vmovdqu64	zmm12,ZMMWORD[64+rbx*1+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10
	vpxorq	zmm26,zmm4,zmm10
	vpxorq	zmm24,zmm0,zmm6
	vpxorq	zmm25,zmm3,zmm7
	vpternlogq	zmm26,zmm5,zmm11,0x96
	vmovdqa64	zmm13,ZMMWORD[896+rsp]
	vmovdqu64	zmm12,ZMMWORD[128+rbx*1+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[960+rsp]
	vmovdqu64	zmm12,ZMMWORD[192+rbx*1+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10

	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96

	vpsrldq	zmm0,zmm26,8
	vpslldq	zmm3,zmm26,8
	vpxorq	zmm24,zmm24,zmm0
	vpxorq	zmm25,zmm25,zmm3
	vextracti64x4	ymm0,zmm24,1
	vpxorq	ymm24,ymm24,ymm0
	vextracti32x4	xmm0,ymm24,1
	vpxorq	xmm24,xmm24,xmm0
	vextracti64x4	ymm3,zmm25,1
	vpxorq	ymm25,ymm25,ymm3
	vextracti32x4	xmm3,ymm25,1
	vpxorq	xmm25,xmm25,xmm3
	vmovdqa64	xmm4,XMMWORD[POLY2]


	vpclmulqdq	xmm0,xmm4,xmm25,0x01
	vpslldq	xmm0,xmm0,8
	vpxorq	xmm0,xmm25,xmm0


	vpclmulqdq	xmm3,xmm4,xmm0,0x00
	vpsrldq	xmm3,xmm3,4
	vpclmulqdq	xmm14,xmm4,xmm0,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm3,xmm24,0x96

$L$_last_blocks_done_54:
	vpshufb	xmm2,xmm2,xmm29
	jmp	NEAR $L$_ghash_done_10
$L$_encrypt_16_blocks_10:
	cmp	r15b,240
	jae	NEAR $L$_16_blocks_overflow_87
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_87
$L$_16_blocks_overflow_87:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_87:
	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rsp]




	vshufi64x2	zmm2,zmm5,zmm5,255
	add	r15b,16


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]







	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]



	vpclmulqdq	zmm6,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]



	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00


	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]



	vpternlogq	zmm6,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]



	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21,ZMMWORD[192+r11*1+r9]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00


	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm26,zmm10,zmm15
	vpxorq	zmm24,zmm6,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30



	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21



	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10],zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
	vmovdqa64	ZMMWORD[1280+rsp],zmm0
	vmovdqa64	ZMMWORD[1344+rsp],zmm3
	vmovdqa64	ZMMWORD[1408+rsp],zmm4
	vmovdqa64	ZMMWORD[1472+rsp],zmm5
	vmovdqa64	zmm13,ZMMWORD[1024+rsp]
	vmovdqu64	zmm12,ZMMWORD[256+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1088+rsp]
	vmovdqu64	zmm12,ZMMWORD[320+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10
	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96
	vmovdqa64	zmm13,ZMMWORD[1152+rsp]
	vmovdqu64	zmm12,ZMMWORD[384+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1216+rsp]
	vmovdqu64	zmm12,ZMMWORD[448+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10

	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96
	sub	r13,256
	add	r11,256
	mov	r10d,r13d
	add	r10d,15
	shr	r10d,4
	je	NEAR $L$_last_num_blocks_is_0_88

	cmp	r10d,8
	je	NEAR $L$_last_num_blocks_is_8_88
	jb	NEAR $L$_last_num_blocks_is_7_1_88


	cmp	r10d,12
	je	NEAR $L$_last_num_blocks_is_12_88
	jb	NEAR $L$_last_num_blocks_is_11_9_88


	cmp	r10d,15
	je	NEAR $L$_last_num_blocks_is_15_88
	ja	NEAR $L$_last_num_blocks_is_16_88
	cmp	r10d,14
	je	NEAR $L$_last_num_blocks_is_14_88
	jmp	NEAR $L$_last_num_blocks_is_13_88

$L$_last_num_blocks_is_11_9_88:

	cmp	r10d,10
	je	NEAR $L$_last_num_blocks_is_10_88
	ja	NEAR $L$_last_num_blocks_is_11_88
	jmp	NEAR $L$_last_num_blocks_is_9_88

$L$_last_num_blocks_is_7_1_88:
	cmp	r10d,4
	je	NEAR $L$_last_num_blocks_is_4_88
	jb	NEAR $L$_last_num_blocks_is_3_1_88

	cmp	r10d,6
	ja	NEAR $L$_last_num_blocks_is_7_88
	je	NEAR $L$_last_num_blocks_is_6_88
	jmp	NEAR $L$_last_num_blocks_is_5_88

$L$_last_num_blocks_is_3_1_88:

	cmp	r10d,2
	ja	NEAR $L$_last_num_blocks_is_3_88
	je	NEAR $L$_last_num_blocks_is_2_88
$L$_last_num_blocks_is_1_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,255
	jae	NEAR $L$_16_blocks_overflow_89
	vpaddd	xmm0,xmm2,xmm28
	jmp	NEAR $L$_16_blocks_ok_89

$L$_16_blocks_overflow_89:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	xmm0,xmm0,xmm29
$L$_16_blocks_ok_89:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm0,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	xmm17{k1}{z},[r11*1+r9]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	xmm0,xmm0,xmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	xmm0,xmm0,xmm30
	vpxorq	xmm0,xmm0,xmm17
	vextracti32x4	xmm11,zmm0,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	XMMWORD[r11*1+r10]{k1},xmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	xmm17,xmm0,xmm29
	vextracti32x4	xmm7,zmm17,0


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_90





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_90
$L$_small_initial_partial_block_90:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11











	vpxorq	xmm14,xmm14,xmm7

	jmp	NEAR $L$_after_reduction_90
$L$_small_initial_compute_done_90:
$L$_after_reduction_90:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_2_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,254
	jae	NEAR $L$_16_blocks_overflow_91
	vpaddd	ymm0,ymm2,ymm28
	jmp	NEAR $L$_16_blocks_ok_91

$L$_16_blocks_overflow_91:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	ymm0,ymm0,ymm29
$L$_16_blocks_ok_91:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm0,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	ymm17{k1}{z},[r11*1+r9]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	ymm0,ymm0,ymm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	ymm0,ymm0,ymm30
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm11,zmm0,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	YMMWORD[r11*1+r10]{k1},ymm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	ymm17,ymm0,ymm29
	vextracti32x4	xmm7,zmm17,1
	sub	r13,16 * (2 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_92





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_92
$L$_small_initial_partial_block_92:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_92:

	or	r13,r13
	je	NEAR $L$_after_reduction_92
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_92:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_3_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,253
	jae	NEAR $L$_16_blocks_overflow_93
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_93

$L$_16_blocks_overflow_93:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_93:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm0,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,2
	sub	r13,16 * (3 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_94





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_94
$L$_small_initial_partial_block_94:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_94:

	or	r13,r13
	je	NEAR $L$_after_reduction_94
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_94:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_4_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,252
	jae	NEAR $L$_16_blocks_overflow_95
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_95

$L$_16_blocks_overflow_95:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_95:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm0,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,3
	sub	r13,16 * (4 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_96





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_96
$L$_small_initial_partial_block_96:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_96:

	or	r13,r13
	je	NEAR $L$_after_reduction_96
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_96:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_5_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,251
	jae	NEAR $L$_16_blocks_overflow_97
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	xmm3,xmm0,xmm27
	jmp	NEAR $L$_16_blocks_ok_97

$L$_16_blocks_overflow_97:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	xmm3,xmm3,xmm29
$L$_16_blocks_ok_97:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm3,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	xmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	xmm3,xmm3,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	xmm3,xmm3,xmm19
	vextracti32x4	xmm11,zmm3,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	XMMWORD[64+r11*1+r10]{k1},xmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	xmm19,xmm3,xmm29
	vextracti32x4	xmm7,zmm19,0
	sub	r13,16 * (5 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_98





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_98
$L$_small_initial_partial_block_98:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_98:

	or	r13,r13
	je	NEAR $L$_after_reduction_98
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_98:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_6_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,250
	jae	NEAR $L$_16_blocks_overflow_99
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	ymm3,ymm0,ymm27
	jmp	NEAR $L$_16_blocks_ok_99

$L$_16_blocks_overflow_99:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	ymm3,ymm3,ymm29
$L$_16_blocks_ok_99:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm3,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	ymm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	ymm3,ymm3,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm11,zmm3,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	YMMWORD[64+r11*1+r10]{k1},ymm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	ymm19,ymm3,ymm29
	vextracti32x4	xmm7,zmm19,1
	sub	r13,16 * (6 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_100





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_100
$L$_small_initial_partial_block_100:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_100:

	or	r13,r13
	je	NEAR $L$_after_reduction_100
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_100:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_7_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,249
	jae	NEAR $L$_16_blocks_overflow_101
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_101

$L$_16_blocks_overflow_101:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_101:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm3,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,2
	sub	r13,16 * (7 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_102





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_102
$L$_small_initial_partial_block_102:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_102:

	or	r13,r13
	je	NEAR $L$_after_reduction_102
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_102:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_8_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,248
	jae	NEAR $L$_16_blocks_overflow_103
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_103

$L$_16_blocks_overflow_103:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_103:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm3,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,3
	sub	r13,16 * (8 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_104





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_104
$L$_small_initial_partial_block_104:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_104:

	or	r13,r13
	je	NEAR $L$_after_reduction_104
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_104:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_9_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,247
	jae	NEAR $L$_16_blocks_overflow_105
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	xmm4,xmm3,xmm27
	jmp	NEAR $L$_16_blocks_ok_105

$L$_16_blocks_overflow_105:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	xmm4,xmm4,xmm29
$L$_16_blocks_ok_105:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm4,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	xmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	xmm4,xmm4,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	xmm4,xmm4,xmm20
	vextracti32x4	xmm11,zmm4,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	XMMWORD[128+r11*1+r10]{k1},xmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	xmm20,xmm4,xmm29
	vextracti32x4	xmm7,zmm20,0
	sub	r13,16 * (9 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_106





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm20,xmm1,0x01
	vpclmulqdq	xmm5,xmm20,xmm1,0x10
	vpclmulqdq	xmm0,xmm20,xmm1,0x11
	vpclmulqdq	xmm3,xmm20,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_106
$L$_small_initial_partial_block_106:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_106:

	or	r13,r13
	je	NEAR $L$_after_reduction_106
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_106:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_10_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,246
	jae	NEAR $L$_16_blocks_overflow_107
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	ymm4,ymm3,ymm27
	jmp	NEAR $L$_16_blocks_ok_107

$L$_16_blocks_overflow_107:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	ymm4,ymm4,ymm29
$L$_16_blocks_ok_107:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm4,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	ymm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	ymm4,ymm4,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	ymm4,ymm4,ymm20
	vextracti32x4	xmm11,zmm4,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	YMMWORD[128+r11*1+r10]{k1},ymm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	ymm20,ymm4,ymm29
	vextracti32x4	xmm7,zmm20,1
	sub	r13,16 * (10 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_108





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm20,ymm1,0x01
	vpclmulqdq	ymm5,ymm20,ymm1,0x10
	vpclmulqdq	ymm0,ymm20,ymm1,0x11
	vpclmulqdq	ymm3,ymm20,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_108
$L$_small_initial_partial_block_108:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm20,xmm1,0x01
	vpclmulqdq	xmm5,xmm20,xmm1,0x10
	vpclmulqdq	xmm0,xmm20,xmm1,0x11
	vpclmulqdq	xmm3,xmm20,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_108:

	or	r13,r13
	je	NEAR $L$_after_reduction_108
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_108:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_11_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,245
	jae	NEAR $L$_16_blocks_overflow_109
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	jmp	NEAR $L$_16_blocks_ok_109

$L$_16_blocks_overflow_109:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
$L$_16_blocks_ok_109:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm4,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vextracti32x4	xmm11,zmm4,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vextracti32x4	xmm7,zmm20,2
	sub	r13,16 * (11 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_110





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm20,zmm1,0x01
	vpclmulqdq	zmm5,zmm20,zmm1,0x10
	vpclmulqdq	zmm0,zmm20,zmm1,0x11
	vpclmulqdq	zmm3,zmm20,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_110
$L$_small_initial_partial_block_110:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm20,ymm1,0x01
	vpclmulqdq	ymm5,ymm20,ymm1,0x10
	vpclmulqdq	ymm0,ymm20,ymm1,0x11
	vpclmulqdq	ymm3,ymm20,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_110:

	or	r13,r13
	je	NEAR $L$_after_reduction_110
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_110:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_12_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,244
	jae	NEAR $L$_16_blocks_overflow_111
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	jmp	NEAR $L$_16_blocks_ok_111

$L$_16_blocks_overflow_111:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
$L$_16_blocks_ok_111:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm4,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vextracti32x4	xmm11,zmm4,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vextracti32x4	xmm7,zmm20,3
	sub	r13,16 * (12 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_112





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_112
$L$_small_initial_partial_block_112:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm20,zmm1,0x01
	vpclmulqdq	zmm5,zmm20,zmm1,0x10
	vpclmulqdq	zmm0,zmm20,zmm1,0x11
	vpclmulqdq	zmm3,zmm20,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_112:

	or	r13,r13
	je	NEAR $L$_after_reduction_112
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_112:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_13_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,243
	jae	NEAR $L$_16_blocks_overflow_113
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	xmm5,xmm4,xmm27
	jmp	NEAR $L$_16_blocks_ok_113

$L$_16_blocks_overflow_113:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	xmm5,xmm5,xmm29
$L$_16_blocks_ok_113:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm5,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	xmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	xmm5,xmm5,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	xmm5,xmm5,xmm21
	vextracti32x4	xmm11,zmm5,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	XMMWORD[192+r11*1+r10]{k1},xmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	xmm21,xmm5,xmm29
	vextracti32x4	xmm7,zmm21,0
	sub	r13,16 * (13 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_114





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm21,xmm1,0x01
	vpclmulqdq	xmm5,xmm21,xmm1,0x10
	vpclmulqdq	xmm0,xmm21,xmm1,0x11
	vpclmulqdq	xmm3,xmm21,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_114
$L$_small_initial_partial_block_114:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_114:

	or	r13,r13
	je	NEAR $L$_after_reduction_114
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_114:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_14_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,242
	jae	NEAR $L$_16_blocks_overflow_115
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	ymm5,ymm4,ymm27
	jmp	NEAR $L$_16_blocks_ok_115

$L$_16_blocks_overflow_115:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	ymm5,ymm5,ymm29
$L$_16_blocks_ok_115:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm5,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	ymm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	ymm5,ymm5,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	ymm5,ymm5,ymm21
	vextracti32x4	xmm11,zmm5,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	YMMWORD[192+r11*1+r10]{k1},ymm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	ymm21,ymm5,ymm29
	vextracti32x4	xmm7,zmm21,1
	sub	r13,16 * (14 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_116





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm21,ymm1,0x01
	vpclmulqdq	ymm5,ymm21,ymm1,0x10
	vpclmulqdq	ymm0,ymm21,ymm1,0x11
	vpclmulqdq	ymm3,ymm21,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_116
$L$_small_initial_partial_block_116:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm21,xmm1,0x01
	vpclmulqdq	xmm5,xmm21,xmm1,0x10
	vpclmulqdq	xmm0,xmm21,xmm1,0x11
	vpclmulqdq	xmm3,xmm21,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_116:

	or	r13,r13
	je	NEAR $L$_after_reduction_116
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_116:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_15_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,241
	jae	NEAR $L$_16_blocks_overflow_117
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_117

$L$_16_blocks_overflow_117:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_117:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm5,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21
	vextracti32x4	xmm11,zmm5,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	zmm21,zmm5,zmm29
	vextracti32x4	xmm7,zmm21,2
	sub	r13,16 * (15 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_118





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm21,zmm1,0x01
	vpclmulqdq	zmm5,zmm21,zmm1,0x10
	vpclmulqdq	zmm0,zmm21,zmm1,0x11
	vpclmulqdq	zmm3,zmm21,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_118
$L$_small_initial_partial_block_118:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm21,ymm1,0x01
	vpclmulqdq	ymm5,ymm21,ymm1,0x10
	vpclmulqdq	ymm0,ymm21,ymm1,0x11
	vpclmulqdq	ymm3,ymm21,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_118:

	or	r13,r13
	je	NEAR $L$_after_reduction_118
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_118:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_16_88:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,240
	jae	NEAR $L$_16_blocks_overflow_119
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_119

$L$_16_blocks_overflow_119:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_119:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm5,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21
	vextracti32x4	xmm11,zmm5,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	zmm21,zmm5,zmm29
	vextracti32x4	xmm7,zmm21,3
	sub	r13,16 * (16 - 1)
$L$_small_initial_partial_block_120:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm21,zmm1,0x01
	vpclmulqdq	zmm5,zmm21,zmm1,0x10
	vpclmulqdq	zmm0,zmm21,zmm1,0x11
	vpclmulqdq	zmm3,zmm21,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_120:
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_120:
	jmp	NEAR $L$_last_blocks_done_88
$L$_last_num_blocks_is_0_88:
	vmovdqa64	zmm13,ZMMWORD[1280+rsp]
	vmovdqu64	zmm12,ZMMWORD[512+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1344+rsp]
	vmovdqu64	zmm12,ZMMWORD[576+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10
	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96
	vmovdqa64	zmm13,ZMMWORD[1408+rsp]
	vmovdqu64	zmm12,ZMMWORD[640+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1472+rsp]
	vmovdqu64	zmm12,ZMMWORD[704+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10

	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96

	vpsrldq	zmm0,zmm26,8
	vpslldq	zmm3,zmm26,8
	vpxorq	zmm24,zmm24,zmm0
	vpxorq	zmm25,zmm25,zmm3
	vextracti64x4	ymm0,zmm24,1
	vpxorq	ymm24,ymm24,ymm0
	vextracti32x4	xmm0,ymm24,1
	vpxorq	xmm24,xmm24,xmm0
	vextracti64x4	ymm3,zmm25,1
	vpxorq	ymm25,ymm25,ymm3
	vextracti32x4	xmm3,ymm25,1
	vpxorq	xmm25,xmm25,xmm3
	vmovdqa64	xmm4,XMMWORD[POLY2]


	vpclmulqdq	xmm0,xmm4,xmm25,0x01
	vpslldq	xmm0,xmm0,8
	vpxorq	xmm0,xmm25,xmm0


	vpclmulqdq	xmm3,xmm4,xmm0,0x00
	vpsrldq	xmm3,xmm3,4
	vpclmulqdq	xmm14,xmm4,xmm0,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm3,xmm24,0x96

$L$_last_blocks_done_88:
	vpshufb	xmm2,xmm2,xmm29
	jmp	NEAR $L$_ghash_done_10

$L$_message_below_32_blocks_10:


	sub	r13,256
	add	r11,256
	mov	r10d,r13d
	test	r14,r14
	jnz	NEAR $L$_skip_hkeys_precomputation_121
	vmovdqu64	zmm3,ZMMWORD[640+rsp]


	vshufi64x2	zmm3,zmm3,zmm3,0x00

	vmovdqu64	zmm4,ZMMWORD[576+rsp]
	vmovdqu64	zmm5,ZMMWORD[512+rsp]

	vpclmulqdq	zmm6,zmm4,zmm3,0x11
	vpclmulqdq	zmm7,zmm4,zmm3,0x00
	vpclmulqdq	zmm10,zmm4,zmm3,0x01
	vpclmulqdq	zmm4,zmm4,zmm3,0x10
	vpxorq	zmm4,zmm4,zmm10

	vpsrldq	zmm10,zmm4,8
	vpslldq	zmm4,zmm4,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm4,zmm4,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm4,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm4,zmm4,zmm7



	vpclmulqdq	zmm7,zmm10,zmm4,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm4,zmm10,zmm4,0x10
	vpslldq	zmm4,zmm4,4

	vpternlogq	zmm4,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[448+rsp],zmm4

	vpclmulqdq	zmm6,zmm5,zmm3,0x11
	vpclmulqdq	zmm7,zmm5,zmm3,0x00
	vpclmulqdq	zmm10,zmm5,zmm3,0x01
	vpclmulqdq	zmm5,zmm5,zmm3,0x10
	vpxorq	zmm5,zmm5,zmm10

	vpsrldq	zmm10,zmm5,8
	vpslldq	zmm5,zmm5,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm5,zmm5,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm5,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm5,zmm5,zmm7



	vpclmulqdq	zmm7,zmm10,zmm5,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm5,zmm10,zmm5,0x10
	vpslldq	zmm5,zmm5,4

	vpternlogq	zmm5,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[384+rsp],zmm5

	vpclmulqdq	zmm6,zmm4,zmm3,0x11
	vpclmulqdq	zmm7,zmm4,zmm3,0x00
	vpclmulqdq	zmm10,zmm4,zmm3,0x01
	vpclmulqdq	zmm4,zmm4,zmm3,0x10
	vpxorq	zmm4,zmm4,zmm10

	vpsrldq	zmm10,zmm4,8
	vpslldq	zmm4,zmm4,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm4,zmm4,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm4,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm4,zmm4,zmm7



	vpclmulqdq	zmm7,zmm10,zmm4,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm4,zmm10,zmm4,0x10
	vpslldq	zmm4,zmm4,4

	vpternlogq	zmm4,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[320+rsp],zmm4

	vpclmulqdq	zmm6,zmm5,zmm3,0x11
	vpclmulqdq	zmm7,zmm5,zmm3,0x00
	vpclmulqdq	zmm10,zmm5,zmm3,0x01
	vpclmulqdq	zmm5,zmm5,zmm3,0x10
	vpxorq	zmm5,zmm5,zmm10

	vpsrldq	zmm10,zmm5,8
	vpslldq	zmm5,zmm5,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm5,zmm5,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm5,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm5,zmm5,zmm7



	vpclmulqdq	zmm7,zmm10,zmm5,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm5,zmm10,zmm5,0x10
	vpslldq	zmm5,zmm5,4

	vpternlogq	zmm5,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[256+rsp],zmm5
$L$_skip_hkeys_precomputation_121:
	mov	r14,1
	and	r10d,~15
	mov	ebx,512
	sub	ebx,r10d
	mov	r10d,r13d
	add	r10d,15
	shr	r10d,4
	je	NEAR $L$_last_num_blocks_is_0_122

	cmp	r10d,8
	je	NEAR $L$_last_num_blocks_is_8_122
	jb	NEAR $L$_last_num_blocks_is_7_1_122


	cmp	r10d,12
	je	NEAR $L$_last_num_blocks_is_12_122
	jb	NEAR $L$_last_num_blocks_is_11_9_122


	cmp	r10d,15
	je	NEAR $L$_last_num_blocks_is_15_122
	ja	NEAR $L$_last_num_blocks_is_16_122
	cmp	r10d,14
	je	NEAR $L$_last_num_blocks_is_14_122
	jmp	NEAR $L$_last_num_blocks_is_13_122

$L$_last_num_blocks_is_11_9_122:

	cmp	r10d,10
	je	NEAR $L$_last_num_blocks_is_10_122
	ja	NEAR $L$_last_num_blocks_is_11_122
	jmp	NEAR $L$_last_num_blocks_is_9_122

$L$_last_num_blocks_is_7_1_122:
	cmp	r10d,4
	je	NEAR $L$_last_num_blocks_is_4_122
	jb	NEAR $L$_last_num_blocks_is_3_1_122

	cmp	r10d,6
	ja	NEAR $L$_last_num_blocks_is_7_122
	je	NEAR $L$_last_num_blocks_is_6_122
	jmp	NEAR $L$_last_num_blocks_is_5_122

$L$_last_num_blocks_is_3_1_122:

	cmp	r10d,2
	ja	NEAR $L$_last_num_blocks_is_3_122
	je	NEAR $L$_last_num_blocks_is_2_122
$L$_last_num_blocks_is_1_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,255
	jae	NEAR $L$_16_blocks_overflow_123
	vpaddd	xmm0,xmm2,xmm28
	jmp	NEAR $L$_16_blocks_ok_123

$L$_16_blocks_overflow_123:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	xmm0,xmm0,xmm29
$L$_16_blocks_ok_123:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	xmm17{k1}{z},[r11*1+r9]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	xmm0,xmm0,xmm31
	vaesenclast	xmm0,xmm0,xmm30
	vpxorq	xmm0,xmm0,xmm17
	vextracti32x4	xmm11,zmm0,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	XMMWORD[r11*1+r10]{k1},xmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	xmm17,xmm0,xmm29
	vextracti32x4	xmm7,zmm17,0


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_124





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_124
$L$_small_initial_partial_block_124:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11


	vpsrldq	zmm0,zmm26,8
	vpslldq	zmm3,zmm26,8
	vpxorq	zmm24,zmm24,zmm0
	vpxorq	zmm25,zmm25,zmm3
	vextracti64x4	ymm0,zmm24,1
	vpxorq	ymm24,ymm24,ymm0
	vextracti32x4	xmm0,ymm24,1
	vpxorq	xmm24,xmm24,xmm0
	vextracti64x4	ymm3,zmm25,1
	vpxorq	ymm25,ymm25,ymm3
	vextracti32x4	xmm3,ymm25,1
	vpxorq	xmm25,xmm25,xmm3
	vmovdqa64	xmm0,XMMWORD[POLY2]


	vpclmulqdq	xmm3,xmm0,xmm25,0x01
	vpslldq	xmm3,xmm3,8
	vpxorq	xmm3,xmm25,xmm3


	vpclmulqdq	xmm4,xmm0,xmm3,0x00
	vpsrldq	xmm4,xmm4,4
	vpclmulqdq	xmm14,xmm0,xmm3,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm4,xmm24,0x96












	vpxorq	xmm14,xmm14,xmm7

	jmp	NEAR $L$_after_reduction_124
$L$_small_initial_compute_done_124:
$L$_after_reduction_124:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_2_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,254
	jae	NEAR $L$_16_blocks_overflow_125
	vpaddd	ymm0,ymm2,ymm28
	jmp	NEAR $L$_16_blocks_ok_125

$L$_16_blocks_overflow_125:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	ymm0,ymm0,ymm29
$L$_16_blocks_ok_125:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	ymm17{k1}{z},[r11*1+r9]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	ymm0,ymm0,ymm31
	vaesenclast	ymm0,ymm0,ymm30
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm11,zmm0,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	YMMWORD[r11*1+r10]{k1},ymm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	ymm17,ymm0,ymm29
	vextracti32x4	xmm7,zmm17,1
	sub	r13,16 * (2 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_126





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_126
$L$_small_initial_partial_block_126:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_126:

	or	r13,r13
	je	NEAR $L$_after_reduction_126
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_126:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_3_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,253
	jae	NEAR $L$_16_blocks_overflow_127
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_127

$L$_16_blocks_overflow_127:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_127:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,2
	sub	r13,16 * (3 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_128





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_128
$L$_small_initial_partial_block_128:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_128:

	or	r13,r13
	je	NEAR $L$_after_reduction_128
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_128:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_4_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,252
	jae	NEAR $L$_16_blocks_overflow_129
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_129

$L$_16_blocks_overflow_129:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_129:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,3
	sub	r13,16 * (4 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_130





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_130
$L$_small_initial_partial_block_130:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_130:

	or	r13,r13
	je	NEAR $L$_after_reduction_130
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_130:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_5_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,251
	jae	NEAR $L$_16_blocks_overflow_131
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	xmm3,xmm0,xmm27
	jmp	NEAR $L$_16_blocks_ok_131

$L$_16_blocks_overflow_131:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	xmm3,xmm3,xmm29
$L$_16_blocks_ok_131:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	xmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	xmm3,xmm3,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	xmm3,xmm3,xmm19
	vextracti32x4	xmm11,zmm3,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	XMMWORD[64+r11*1+r10]{k1},xmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	xmm19,xmm3,xmm29
	vextracti32x4	xmm7,zmm19,0
	sub	r13,16 * (5 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_132





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_132
$L$_small_initial_partial_block_132:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_132:

	or	r13,r13
	je	NEAR $L$_after_reduction_132
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_132:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_6_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,250
	jae	NEAR $L$_16_blocks_overflow_133
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	ymm3,ymm0,ymm27
	jmp	NEAR $L$_16_blocks_ok_133

$L$_16_blocks_overflow_133:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	ymm3,ymm3,ymm29
$L$_16_blocks_ok_133:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	ymm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	ymm3,ymm3,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm11,zmm3,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	YMMWORD[64+r11*1+r10]{k1},ymm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	ymm19,ymm3,ymm29
	vextracti32x4	xmm7,zmm19,1
	sub	r13,16 * (6 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_134





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_134
$L$_small_initial_partial_block_134:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_134:

	or	r13,r13
	je	NEAR $L$_after_reduction_134
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_134:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_7_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,249
	jae	NEAR $L$_16_blocks_overflow_135
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_135

$L$_16_blocks_overflow_135:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_135:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,2
	sub	r13,16 * (7 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_136





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_136
$L$_small_initial_partial_block_136:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_136:

	or	r13,r13
	je	NEAR $L$_after_reduction_136
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_136:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_8_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,248
	jae	NEAR $L$_16_blocks_overflow_137
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_137

$L$_16_blocks_overflow_137:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_137:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,3
	sub	r13,16 * (8 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_138





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_138
$L$_small_initial_partial_block_138:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_138:

	or	r13,r13
	je	NEAR $L$_after_reduction_138
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_138:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_9_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,247
	jae	NEAR $L$_16_blocks_overflow_139
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	xmm4,xmm3,xmm27
	jmp	NEAR $L$_16_blocks_ok_139

$L$_16_blocks_overflow_139:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	xmm4,xmm4,xmm29
$L$_16_blocks_ok_139:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	xmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	xmm4,xmm4,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	xmm4,xmm4,xmm20
	vextracti32x4	xmm11,zmm4,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	XMMWORD[128+r11*1+r10]{k1},xmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	xmm20,xmm4,xmm29
	vextracti32x4	xmm7,zmm20,0
	sub	r13,16 * (9 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_140





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm20,xmm1,0x01
	vpclmulqdq	xmm5,xmm20,xmm1,0x10
	vpclmulqdq	xmm0,xmm20,xmm1,0x11
	vpclmulqdq	xmm3,xmm20,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_140
$L$_small_initial_partial_block_140:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_140:

	or	r13,r13
	je	NEAR $L$_after_reduction_140
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_140:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_10_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,246
	jae	NEAR $L$_16_blocks_overflow_141
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	ymm4,ymm3,ymm27
	jmp	NEAR $L$_16_blocks_ok_141

$L$_16_blocks_overflow_141:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	ymm4,ymm4,ymm29
$L$_16_blocks_ok_141:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	ymm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	ymm4,ymm4,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	ymm4,ymm4,ymm20
	vextracti32x4	xmm11,zmm4,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	YMMWORD[128+r11*1+r10]{k1},ymm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	ymm20,ymm4,ymm29
	vextracti32x4	xmm7,zmm20,1
	sub	r13,16 * (10 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_142





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm20,ymm1,0x01
	vpclmulqdq	ymm5,ymm20,ymm1,0x10
	vpclmulqdq	ymm0,ymm20,ymm1,0x11
	vpclmulqdq	ymm3,ymm20,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_142
$L$_small_initial_partial_block_142:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm20,xmm1,0x01
	vpclmulqdq	xmm5,xmm20,xmm1,0x10
	vpclmulqdq	xmm0,xmm20,xmm1,0x11
	vpclmulqdq	xmm3,xmm20,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_142:

	or	r13,r13
	je	NEAR $L$_after_reduction_142
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_142:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_11_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,245
	jae	NEAR $L$_16_blocks_overflow_143
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	jmp	NEAR $L$_16_blocks_ok_143

$L$_16_blocks_overflow_143:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
$L$_16_blocks_ok_143:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vextracti32x4	xmm11,zmm4,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vextracti32x4	xmm7,zmm20,2
	sub	r13,16 * (11 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_144





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm20,zmm1,0x01
	vpclmulqdq	zmm5,zmm20,zmm1,0x10
	vpclmulqdq	zmm0,zmm20,zmm1,0x11
	vpclmulqdq	zmm3,zmm20,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_144
$L$_small_initial_partial_block_144:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm20,ymm1,0x01
	vpclmulqdq	ymm5,ymm20,ymm1,0x10
	vpclmulqdq	ymm0,ymm20,ymm1,0x11
	vpclmulqdq	ymm3,ymm20,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_144:

	or	r13,r13
	je	NEAR $L$_after_reduction_144
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_144:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_12_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,244
	jae	NEAR $L$_16_blocks_overflow_145
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	jmp	NEAR $L$_16_blocks_ok_145

$L$_16_blocks_overflow_145:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
$L$_16_blocks_ok_145:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vextracti32x4	xmm11,zmm4,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vextracti32x4	xmm7,zmm20,3
	sub	r13,16 * (12 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_146





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_146
$L$_small_initial_partial_block_146:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm20,zmm1,0x01
	vpclmulqdq	zmm5,zmm20,zmm1,0x10
	vpclmulqdq	zmm0,zmm20,zmm1,0x11
	vpclmulqdq	zmm3,zmm20,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_146:

	or	r13,r13
	je	NEAR $L$_after_reduction_146
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_146:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_13_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,243
	jae	NEAR $L$_16_blocks_overflow_147
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	xmm5,xmm4,xmm27
	jmp	NEAR $L$_16_blocks_ok_147

$L$_16_blocks_overflow_147:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	xmm5,xmm5,xmm29
$L$_16_blocks_ok_147:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	xmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	xmm5,xmm5,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	xmm5,xmm5,xmm21
	vextracti32x4	xmm11,zmm5,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	XMMWORD[192+r11*1+r10]{k1},xmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	xmm21,xmm5,xmm29
	vextracti32x4	xmm7,zmm21,0
	sub	r13,16 * (13 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_148





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm21,xmm1,0x01
	vpclmulqdq	xmm5,xmm21,xmm1,0x10
	vpclmulqdq	xmm0,xmm21,xmm1,0x11
	vpclmulqdq	xmm3,xmm21,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_148
$L$_small_initial_partial_block_148:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_148:

	or	r13,r13
	je	NEAR $L$_after_reduction_148
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_148:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_14_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,242
	jae	NEAR $L$_16_blocks_overflow_149
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	ymm5,ymm4,ymm27
	jmp	NEAR $L$_16_blocks_ok_149

$L$_16_blocks_overflow_149:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	ymm5,ymm5,ymm29
$L$_16_blocks_ok_149:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	ymm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	ymm5,ymm5,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	ymm5,ymm5,ymm21
	vextracti32x4	xmm11,zmm5,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	YMMWORD[192+r11*1+r10]{k1},ymm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	ymm21,ymm5,ymm29
	vextracti32x4	xmm7,zmm21,1
	sub	r13,16 * (14 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_150





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm21,ymm1,0x01
	vpclmulqdq	ymm5,ymm21,ymm1,0x10
	vpclmulqdq	ymm0,ymm21,ymm1,0x11
	vpclmulqdq	ymm3,ymm21,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_150
$L$_small_initial_partial_block_150:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm21,xmm1,0x01
	vpclmulqdq	xmm5,xmm21,xmm1,0x10
	vpclmulqdq	xmm0,xmm21,xmm1,0x11
	vpclmulqdq	xmm3,xmm21,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_150:

	or	r13,r13
	je	NEAR $L$_after_reduction_150
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_150:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_15_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,241
	jae	NEAR $L$_16_blocks_overflow_151
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_151

$L$_16_blocks_overflow_151:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_151:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21
	vextracti32x4	xmm11,zmm5,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	zmm21,zmm5,zmm29
	vextracti32x4	xmm7,zmm21,2
	sub	r13,16 * (15 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_152





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm21,zmm1,0x01
	vpclmulqdq	zmm5,zmm21,zmm1,0x10
	vpclmulqdq	zmm0,zmm21,zmm1,0x11
	vpclmulqdq	zmm3,zmm21,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_152
$L$_small_initial_partial_block_152:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm21,ymm1,0x01
	vpclmulqdq	ymm5,ymm21,ymm1,0x10
	vpclmulqdq	ymm0,ymm21,ymm1,0x11
	vpclmulqdq	ymm3,ymm21,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_152:

	or	r13,r13
	je	NEAR $L$_after_reduction_152
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_152:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_16_122:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,240
	jae	NEAR $L$_16_blocks_overflow_153
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_153

$L$_16_blocks_overflow_153:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_153:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21
	vextracti32x4	xmm11,zmm5,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	zmm21,zmm5,zmm29
	vextracti32x4	xmm7,zmm21,3
	sub	r13,16 * (16 - 1)
$L$_small_initial_partial_block_154:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm21,zmm1,0x01
	vpclmulqdq	zmm5,zmm21,zmm1,0x10
	vpclmulqdq	zmm0,zmm21,zmm1,0x11
	vpclmulqdq	zmm3,zmm21,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_154:
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_154:
	jmp	NEAR $L$_last_blocks_done_122
$L$_last_num_blocks_is_0_122:
	vmovdqa64	zmm13,ZMMWORD[768+rsp]
	vpxorq	zmm13,zmm13,zmm14
	vmovdqu64	zmm12,ZMMWORD[rbx*1+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[832+rsp]
	vmovdqu64	zmm12,ZMMWORD[64+rbx*1+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10
	vpxorq	zmm26,zmm4,zmm10
	vpxorq	zmm24,zmm0,zmm6
	vpxorq	zmm25,zmm3,zmm7
	vpternlogq	zmm26,zmm5,zmm11,0x96
	vmovdqa64	zmm13,ZMMWORD[896+rsp]
	vmovdqu64	zmm12,ZMMWORD[128+rbx*1+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[960+rsp]
	vmovdqu64	zmm12,ZMMWORD[192+rbx*1+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10

	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96

	vpsrldq	zmm0,zmm26,8
	vpslldq	zmm3,zmm26,8
	vpxorq	zmm24,zmm24,zmm0
	vpxorq	zmm25,zmm25,zmm3
	vextracti64x4	ymm0,zmm24,1
	vpxorq	ymm24,ymm24,ymm0
	vextracti32x4	xmm0,ymm24,1
	vpxorq	xmm24,xmm24,xmm0
	vextracti64x4	ymm3,zmm25,1
	vpxorq	ymm25,ymm25,ymm3
	vextracti32x4	xmm3,ymm25,1
	vpxorq	xmm25,xmm25,xmm3
	vmovdqa64	xmm4,XMMWORD[POLY2]


	vpclmulqdq	xmm0,xmm4,xmm25,0x01
	vpslldq	xmm0,xmm0,8
	vpxorq	xmm0,xmm25,xmm0


	vpclmulqdq	xmm3,xmm4,xmm0,0x00
	vpsrldq	xmm3,xmm3,4
	vpclmulqdq	xmm14,xmm4,xmm0,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm3,xmm24,0x96

$L$_last_blocks_done_122:
	vpshufb	xmm2,xmm2,xmm29
	jmp	NEAR $L$_ghash_done_10

$L$_message_below_equal_16_blocks_10:


	mov	r12d,r13d
	add	r12d,15
	shr	r12d,4
	cmp	r12,8
	je	NEAR $L$_small_initial_num_blocks_is_8_155
	jl	NEAR $L$_small_initial_num_blocks_is_7_1_155


	cmp	r12,12
	je	NEAR $L$_small_initial_num_blocks_is_12_155
	jl	NEAR $L$_small_initial_num_blocks_is_11_9_155


	cmp	r12,16
	je	NEAR $L$_small_initial_num_blocks_is_16_155
	cmp	r12,15
	je	NEAR $L$_small_initial_num_blocks_is_15_155
	cmp	r12,14
	je	NEAR $L$_small_initial_num_blocks_is_14_155
	jmp	NEAR $L$_small_initial_num_blocks_is_13_155

$L$_small_initial_num_blocks_is_11_9_155:

	cmp	r12,11
	je	NEAR $L$_small_initial_num_blocks_is_11_155
	cmp	r12,10
	je	NEAR $L$_small_initial_num_blocks_is_10_155
	jmp	NEAR $L$_small_initial_num_blocks_is_9_155

$L$_small_initial_num_blocks_is_7_1_155:
	cmp	r12,4
	je	NEAR $L$_small_initial_num_blocks_is_4_155
	jl	NEAR $L$_small_initial_num_blocks_is_3_1_155

	cmp	r12,7
	je	NEAR $L$_small_initial_num_blocks_is_7_155
	cmp	r12,6
	je	NEAR $L$_small_initial_num_blocks_is_6_155
	jmp	NEAR $L$_small_initial_num_blocks_is_5_155

$L$_small_initial_num_blocks_is_3_1_155:

	cmp	r12,3
	je	NEAR $L$_small_initial_num_blocks_is_3_155
	cmp	r12,2
	je	NEAR $L$_small_initial_num_blocks_is_2_155





$L$_small_initial_num_blocks_is_1_155:
	vmovdqa64	xmm29,XMMWORD[SHUF_MASK]
	vpaddd	xmm0,xmm2,XMMWORD[ONE]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm0,0
	vpshufb	xmm0,xmm0,xmm29
	vmovdqu8	xmm6{k1}{z},[r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	xmm0,xmm0,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	xmm0,xmm0,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	xmm0,xmm0,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	xmm0,xmm0,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	xmm0,xmm0,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	xmm0,xmm0,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	xmm0,xmm0,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	xmm0,xmm0,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	xmm0,xmm0,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	xmm0,xmm0,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	xmm0,xmm0,xmm15
	vpxorq	xmm0,xmm0,xmm6
	vextracti32x4	xmm12,zmm0,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	XMMWORD[r11*1+r10]{k1},xmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	xmm6,xmm0,xmm29
	vextracti32x4	xmm13,zmm6,0


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_156





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	xmm20,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm6,xmm20,0x01
	vpclmulqdq	xmm5,xmm6,xmm20,0x10
	vpclmulqdq	xmm0,xmm6,xmm20,0x11
	vpclmulqdq	xmm3,xmm6,xmm20,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_156
$L$_small_initial_partial_block_156:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12











	vpxorq	xmm14,xmm14,xmm13

	jmp	NEAR $L$_after_reduction_156
$L$_small_initial_compute_done_156:
$L$_after_reduction_156:
	jmp	NEAR $L$_small_initial_blocks_encrypted_155
$L$_small_initial_num_blocks_is_2_155:
	vmovdqa64	ymm29,YMMWORD[SHUF_MASK]
	vshufi64x2	ymm0,ymm2,ymm2,0
	vpaddd	ymm0,ymm0,YMMWORD[ddq_add_1234]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm0,1
	vpshufb	ymm0,ymm0,ymm29
	vmovdqu8	ymm6{k1}{z},[r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	ymm0,ymm0,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	ymm0,ymm0,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	ymm0,ymm0,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	ymm0,ymm0,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	ymm0,ymm0,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	ymm0,ymm0,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	ymm0,ymm0,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	ymm0,ymm0,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	ymm0,ymm0,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	ymm0,ymm0,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	ymm0,ymm0,ymm15
	vpxorq	ymm0,ymm0,ymm6
	vextracti32x4	xmm12,zmm0,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	YMMWORD[r11*1+r10]{k1},ymm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	ymm6,ymm0,ymm29
	vextracti32x4	xmm13,zmm6,1
	sub	r13,16 * (2 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_157





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	ymm20,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm6,ymm20,0x01
	vpclmulqdq	ymm5,ymm6,ymm20,0x10
	vpclmulqdq	ymm0,ymm6,ymm20,0x11
	vpclmulqdq	ymm3,ymm6,ymm20,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_157
$L$_small_initial_partial_block_157:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	xmm20,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm6,xmm20,0x01
	vpclmulqdq	xmm5,xmm6,xmm20,0x10
	vpclmulqdq	xmm0,xmm6,xmm20,0x11
	vpclmulqdq	xmm3,xmm6,xmm20,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_157:

	or	r13,r13
	je	NEAR $L$_after_reduction_157
	vpxorq	xmm14,xmm14,xmm13
$L$_after_reduction_157:
	jmp	NEAR $L$_small_initial_blocks_encrypted_155
$L$_small_initial_num_blocks_is_3_155:
	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vshufi64x2	zmm2,zmm2,zmm2,0
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm0,2
	vpshufb	zmm0,zmm0,zmm29
	vmovdqu8	zmm6{k1}{z},[r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	zmm0,zmm0,zmm15
	vpxorq	zmm0,zmm0,zmm6
	vextracti32x4	xmm12,zmm0,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm6,zmm0,zmm29
	vextracti32x4	xmm13,zmm6,2
	sub	r13,16 * (3 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_158





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	ymm20,YMMWORD[304+rdx]
	vinserti64x2	zmm20,zmm20,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_158
$L$_small_initial_partial_block_158:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	ymm20,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm6,ymm20,0x01
	vpclmulqdq	ymm5,ymm6,ymm20,0x10
	vpclmulqdq	ymm0,ymm6,ymm20,0x11
	vpclmulqdq	ymm3,ymm6,ymm20,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_158:

	or	r13,r13
	je	NEAR $L$_after_reduction_158
	vpxorq	xmm14,xmm14,xmm13
$L$_after_reduction_158:
	jmp	NEAR $L$_small_initial_blocks_encrypted_155
$L$_small_initial_num_blocks_is_4_155:
	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vshufi64x2	zmm2,zmm2,zmm2,0
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm0,3
	vpshufb	zmm0,zmm0,zmm29
	vmovdqu8	zmm6{k1}{z},[r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	zmm0,zmm0,zmm15
	vpxorq	zmm0,zmm0,zmm6
	vextracti32x4	xmm12,zmm0,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm6,zmm0,zmm29
	vextracti32x4	xmm13,zmm6,3
	sub	r13,16 * (4 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_159





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[288+rdx]
	vpclmulqdq	zmm15,zmm6,zmm20,0x11
	vpclmulqdq	zmm16,zmm6,zmm20,0x00
	vpclmulqdq	zmm17,zmm6,zmm20,0x01
	vpclmulqdq	zmm19,zmm6,zmm20,0x10

	vpxorq	zmm17,zmm17,zmm19
	vpsrldq	zmm4,zmm17,8
	vpslldq	zmm5,zmm17,8
	vpxorq	zmm0,zmm15,zmm4
	vpxorq	zmm3,zmm16,zmm5
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_159
$L$_small_initial_partial_block_159:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	ymm20,YMMWORD[304+rdx]
	vinserti64x2	zmm20,zmm20,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_159:

	or	r13,r13
	je	NEAR $L$_after_reduction_159
	vpxorq	xmm14,xmm14,xmm13
$L$_after_reduction_159:
	jmp	NEAR $L$_small_initial_blocks_encrypted_155
$L$_small_initial_num_blocks_is_5_155:
	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vshufi64x2	zmm2,zmm2,zmm2,0
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm2,ZMMWORD[ddq_add_5678]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	sub	r15,64
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm3,0
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	xmm3,xmm3,xmm29
	vmovdqu8	zmm6,ZMMWORD[r11*1+r9]
	vmovdqu8	xmm7{k1}{z},[64+r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	xmm3,xmm3,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	xmm3,xmm3,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	xmm3,xmm3,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	xmm3,xmm3,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	xmm3,xmm3,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	xmm3,xmm3,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	xmm3,xmm3,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	xmm3,xmm3,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	xmm3,xmm3,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	xmm3,xmm3,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	zmm0,zmm0,zmm15
	vaesenclast	xmm3,xmm3,xmm15
	vpxorq	zmm0,zmm0,zmm6
	vpxorq	xmm3,xmm3,xmm7
	vextracti32x4	xmm12,zmm3,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	XMMWORD[64+r11*1+r10]{k1},xmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm6,zmm0,zmm29
	vpshufb	xmm7,xmm3,xmm29
	vextracti32x4	xmm13,zmm7,0
	sub	r13,16 * (5 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_160





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[272+rdx]
	vpclmulqdq	zmm15,zmm6,zmm20,0x11
	vpclmulqdq	zmm16,zmm6,zmm20,0x00
	vpclmulqdq	zmm17,zmm6,zmm20,0x01
	vpclmulqdq	zmm19,zmm6,zmm20,0x10
	vmovdqu64	xmm20,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm7,xmm20,0x01
	vpclmulqdq	xmm5,xmm7,xmm20,0x10
	vpclmulqdq	xmm0,xmm7,xmm20,0x11
	vpclmulqdq	xmm3,xmm7,xmm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_160
$L$_small_initial_partial_block_160:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[288+rdx]
	vpclmulqdq	zmm15,zmm6,zmm20,0x11
	vpclmulqdq	zmm16,zmm6,zmm20,0x00
	vpclmulqdq	zmm17,zmm6,zmm20,0x01
	vpclmulqdq	zmm19,zmm6,zmm20,0x10

	vpxorq	zmm17,zmm17,zmm19
	vpsrldq	zmm4,zmm17,8
	vpslldq	zmm5,zmm17,8
	vpxorq	zmm0,zmm15,zmm4
	vpxorq	zmm3,zmm16,zmm5
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_160:

	or	r13,r13
	je	NEAR $L$_after_reduction_160
	vpxorq	xmm14,xmm14,xmm13
$L$_after_reduction_160:
	jmp	NEAR $L$_small_initial_blocks_encrypted_155
$L$_small_initial_num_blocks_is_6_155:
	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vshufi64x2	zmm2,zmm2,zmm2,0
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm2,ZMMWORD[ddq_add_5678]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	sub	r15,64
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm3,1
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	ymm3,ymm3,ymm29
	vmovdqu8	zmm6,ZMMWORD[r11*1+r9]
	vmovdqu8	ymm7{k1}{z},[64+r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	ymm3,ymm3,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	ymm3,ymm3,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	ymm3,ymm3,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	ymm3,ymm3,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	ymm3,ymm3,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	ymm3,ymm3,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	ymm3,ymm3,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	ymm3,ymm3,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	ymm3,ymm3,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	ymm3,ymm3,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	zmm0,zmm0,zmm15
	vaesenclast	ymm3,ymm3,ymm15
	vpxorq	zmm0,zmm0,zmm6
	vpxorq	ymm3,ymm3,ymm7
	vextracti32x4	xmm12,zmm3,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	YMMWORD[64+r11*1+r10]{k1},ymm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm6,zmm0,zmm29
	vpshufb	ymm7,ymm3,ymm29
	vextracti32x4	xmm13,zmm7,1
	sub	r13,16 * (6 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_161





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[256+rdx]
	vpclmulqdq	zmm15,zmm6,zmm20,0x11
	vpclmulqdq	zmm16,zmm6,zmm20,0x00
	vpclmulqdq	zmm17,zmm6,zmm20,0x01
	vpclmulqdq	zmm19,zmm6,zmm20,0x10
	vmovdqu64	ymm20,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm7,ymm20,0x01
	vpclmulqdq	ymm5,ymm7,ymm20,0x10
	vpclmulqdq	ymm0,ymm7,ymm20,0x11
	vpclmulqdq	ymm3,ymm7,ymm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_161
$L$_small_initial_partial_block_161:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[272+rdx]
	vpclmulqdq	zmm15,zmm6,zmm20,0x11
	vpclmulqdq	zmm16,zmm6,zmm20,0x00
	vpclmulqdq	zmm17,zmm6,zmm20,0x01
	vpclmulqdq	zmm19,zmm6,zmm20,0x10
	vmovdqu64	xmm20,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm7,xmm20,0x01
	vpclmulqdq	xmm5,xmm7,xmm20,0x10
	vpclmulqdq	xmm0,xmm7,xmm20,0x11
	vpclmulqdq	xmm3,xmm7,xmm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_161:

	or	r13,r13
	je	NEAR $L$_after_reduction_161
	vpxorq	xmm14,xmm14,xmm13
$L$_after_reduction_161:
	jmp	NEAR $L$_small_initial_blocks_encrypted_155
$L$_small_initial_num_blocks_is_7_155:
	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vshufi64x2	zmm2,zmm2,zmm2,0
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm2,ZMMWORD[ddq_add_5678]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	sub	r15,64
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm3,2
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vmovdqu8	zmm6,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm7{k1}{z},[64+r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	zmm0,zmm0,zmm15
	vaesenclast	zmm3,zmm3,zmm15
	vpxorq	zmm0,zmm0,zmm6
	vpxorq	zmm3,zmm3,zmm7
	vextracti32x4	xmm12,zmm3,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm6,zmm0,zmm29
	vpshufb	zmm7,zmm3,zmm29
	vextracti32x4	xmm13,zmm7,2
	sub	r13,16 * (7 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_162





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[240+rdx]
	vpclmulqdq	zmm15,zmm6,zmm20,0x11
	vpclmulqdq	zmm16,zmm6,zmm20,0x00
	vpclmulqdq	zmm17,zmm6,zmm20,0x01
	vpclmulqdq	zmm19,zmm6,zmm20,0x10
	vmovdqu64	ymm20,YMMWORD[304+rdx]
	vinserti64x2	zmm20,zmm20,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm7,zmm20,0x01
	vpclmulqdq	zmm5,zmm7,zmm20,0x10
	vpclmulqdq	zmm0,zmm7,zmm20,0x11
	vpclmulqdq	zmm3,zmm7,zmm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_162
$L$_small_initial_partial_block_162:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[256+rdx]
	vpclmulqdq	zmm15,zmm6,zmm20,0x11
	vpclmulqdq	zmm16,zmm6,zmm20,0x00
	vpclmulqdq	zmm17,zmm6,zmm20,0x01
	vpclmulqdq	zmm19,zmm6,zmm20,0x10
	vmovdqu64	ymm20,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm7,ymm20,0x01
	vpclmulqdq	ymm5,ymm7,ymm20,0x10
	vpclmulqdq	ymm0,ymm7,ymm20,0x11
	vpclmulqdq	ymm3,ymm7,ymm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_162:

	or	r13,r13
	je	NEAR $L$_after_reduction_162
	vpxorq	xmm14,xmm14,xmm13
$L$_after_reduction_162:
	jmp	NEAR $L$_small_initial_blocks_encrypted_155
$L$_small_initial_num_blocks_is_8_155:
	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vshufi64x2	zmm2,zmm2,zmm2,0
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm2,ZMMWORD[ddq_add_5678]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	sub	r15,64
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm3,3
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vmovdqu8	zmm6,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm7{k1}{z},[64+r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	zmm0,zmm0,zmm15
	vaesenclast	zmm3,zmm3,zmm15
	vpxorq	zmm0,zmm0,zmm6
	vpxorq	zmm3,zmm3,zmm7
	vextracti32x4	xmm12,zmm3,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm6,zmm0,zmm29
	vpshufb	zmm7,zmm3,zmm29
	vextracti32x4	xmm13,zmm7,3
	sub	r13,16 * (8 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_163





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[288+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vpxorq	zmm15,zmm0,zmm15
	vpxorq	zmm16,zmm3,zmm16
	vpxorq	zmm17,zmm4,zmm17
	vpxorq	zmm19,zmm5,zmm19

	vpxorq	zmm17,zmm17,zmm19
	vpsrldq	zmm4,zmm17,8
	vpslldq	zmm5,zmm17,8
	vpxorq	zmm0,zmm15,zmm4
	vpxorq	zmm3,zmm16,zmm5
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_163
$L$_small_initial_partial_block_163:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[240+rdx]
	vpclmulqdq	zmm15,zmm6,zmm20,0x11
	vpclmulqdq	zmm16,zmm6,zmm20,0x00
	vpclmulqdq	zmm17,zmm6,zmm20,0x01
	vpclmulqdq	zmm19,zmm6,zmm20,0x10
	vmovdqu64	ymm20,YMMWORD[304+rdx]
	vinserti64x2	zmm20,zmm20,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm7,zmm20,0x01
	vpclmulqdq	zmm5,zmm7,zmm20,0x10
	vpclmulqdq	zmm0,zmm7,zmm20,0x11
	vpclmulqdq	zmm3,zmm7,zmm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_163:

	or	r13,r13
	je	NEAR $L$_after_reduction_163
	vpxorq	xmm14,xmm14,xmm13
$L$_after_reduction_163:
	jmp	NEAR $L$_small_initial_blocks_encrypted_155
$L$_small_initial_num_blocks_is_9_155:
	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vshufi64x2	zmm2,zmm2,zmm2,0
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm2,ZMMWORD[ddq_add_5678]
	vpaddd	zmm4,zmm0,ZMMWORD[ddq_add_8888]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	sub	r15,128
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm4,0
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	xmm4,xmm4,xmm29
	vmovdqu8	zmm6,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm7,ZMMWORD[64+r11*1+r9]
	vmovdqu8	xmm10{k1}{z},[128+r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm15
	vpxorq	xmm4,xmm4,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	xmm4,xmm4,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	xmm4,xmm4,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	xmm4,xmm4,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	xmm4,xmm4,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	xmm4,xmm4,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	xmm4,xmm4,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	xmm4,xmm4,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	xmm4,xmm4,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	xmm4,xmm4,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	zmm0,zmm0,zmm15
	vaesenclast	zmm3,zmm3,zmm15
	vaesenclast	xmm4,xmm4,xmm15
	vpxorq	zmm0,zmm0,zmm6
	vpxorq	zmm3,zmm3,zmm7
	vpxorq	xmm4,xmm4,xmm10
	vextracti32x4	xmm12,zmm4,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	XMMWORD[128+r11*1+r10]{k1},xmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm6,zmm0,zmm29
	vpshufb	zmm7,zmm3,zmm29
	vpshufb	xmm10,xmm4,xmm29
	vextracti32x4	xmm13,zmm10,0
	sub	r13,16 * (9 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_164





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[272+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vpxorq	zmm15,zmm0,zmm15
	vpxorq	zmm16,zmm3,zmm16
	vpxorq	zmm17,zmm4,zmm17
	vpxorq	zmm19,zmm5,zmm19
	vmovdqu64	xmm20,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm10,xmm20,0x01
	vpclmulqdq	xmm5,xmm10,xmm20,0x10
	vpclmulqdq	xmm0,xmm10,xmm20,0x11
	vpclmulqdq	xmm3,xmm10,xmm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_164
$L$_small_initial_partial_block_164:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[288+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vpxorq	zmm15,zmm0,zmm15
	vpxorq	zmm16,zmm3,zmm16
	vpxorq	zmm17,zmm4,zmm17
	vpxorq	zmm19,zmm5,zmm19

	vpxorq	zmm17,zmm17,zmm19
	vpsrldq	zmm4,zmm17,8
	vpslldq	zmm5,zmm17,8
	vpxorq	zmm0,zmm15,zmm4
	vpxorq	zmm3,zmm16,zmm5
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_164:

	or	r13,r13
	je	NEAR $L$_after_reduction_164
	vpxorq	xmm14,xmm14,xmm13
$L$_after_reduction_164:
	jmp	NEAR $L$_small_initial_blocks_encrypted_155
$L$_small_initial_num_blocks_is_10_155:
	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vshufi64x2	zmm2,zmm2,zmm2,0
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm2,ZMMWORD[ddq_add_5678]
	vpaddd	zmm4,zmm0,ZMMWORD[ddq_add_8888]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	sub	r15,128
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm4,1
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	ymm4,ymm4,ymm29
	vmovdqu8	zmm6,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm7,ZMMWORD[64+r11*1+r9]
	vmovdqu8	ymm10{k1}{z},[128+r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm15
	vpxorq	ymm4,ymm4,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	ymm4,ymm4,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	ymm4,ymm4,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	ymm4,ymm4,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	ymm4,ymm4,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	ymm4,ymm4,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	ymm4,ymm4,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	ymm4,ymm4,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	ymm4,ymm4,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	ymm4,ymm4,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	zmm0,zmm0,zmm15
	vaesenclast	zmm3,zmm3,zmm15
	vaesenclast	ymm4,ymm4,ymm15
	vpxorq	zmm0,zmm0,zmm6
	vpxorq	zmm3,zmm3,zmm7
	vpxorq	ymm4,ymm4,ymm10
	vextracti32x4	xmm12,zmm4,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	YMMWORD[128+r11*1+r10]{k1},ymm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm6,zmm0,zmm29
	vpshufb	zmm7,zmm3,zmm29
	vpshufb	ymm10,ymm4,ymm29
	vextracti32x4	xmm13,zmm10,1
	sub	r13,16 * (10 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_165





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[256+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vpxorq	zmm15,zmm0,zmm15
	vpxorq	zmm16,zmm3,zmm16
	vpxorq	zmm17,zmm4,zmm17
	vpxorq	zmm19,zmm5,zmm19
	vmovdqu64	ymm20,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm10,ymm20,0x01
	vpclmulqdq	ymm5,ymm10,ymm20,0x10
	vpclmulqdq	ymm0,ymm10,ymm20,0x11
	vpclmulqdq	ymm3,ymm10,ymm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_165
$L$_small_initial_partial_block_165:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[272+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vpxorq	zmm15,zmm0,zmm15
	vpxorq	zmm16,zmm3,zmm16
	vpxorq	zmm17,zmm4,zmm17
	vpxorq	zmm19,zmm5,zmm19
	vmovdqu64	xmm20,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm10,xmm20,0x01
	vpclmulqdq	xmm5,xmm10,xmm20,0x10
	vpclmulqdq	xmm0,xmm10,xmm20,0x11
	vpclmulqdq	xmm3,xmm10,xmm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_165:

	or	r13,r13
	je	NEAR $L$_after_reduction_165
	vpxorq	xmm14,xmm14,xmm13
$L$_after_reduction_165:
	jmp	NEAR $L$_small_initial_blocks_encrypted_155
$L$_small_initial_num_blocks_is_11_155:
	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vshufi64x2	zmm2,zmm2,zmm2,0
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm2,ZMMWORD[ddq_add_5678]
	vpaddd	zmm4,zmm0,ZMMWORD[ddq_add_8888]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	sub	r15,128
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm4,2
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vmovdqu8	zmm6,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm7,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm10{k1}{z},[128+r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm15
	vpxorq	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	zmm0,zmm0,zmm15
	vaesenclast	zmm3,zmm3,zmm15
	vaesenclast	zmm4,zmm4,zmm15
	vpxorq	zmm0,zmm0,zmm6
	vpxorq	zmm3,zmm3,zmm7
	vpxorq	zmm4,zmm4,zmm10
	vextracti32x4	xmm12,zmm4,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm6,zmm0,zmm29
	vpshufb	zmm7,zmm3,zmm29
	vpshufb	zmm10,zmm4,zmm29
	vextracti32x4	xmm13,zmm10,2
	sub	r13,16 * (11 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_166





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[240+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vpxorq	zmm15,zmm0,zmm15
	vpxorq	zmm16,zmm3,zmm16
	vpxorq	zmm17,zmm4,zmm17
	vpxorq	zmm19,zmm5,zmm19
	vmovdqu64	ymm20,YMMWORD[304+rdx]
	vinserti64x2	zmm20,zmm20,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm10,zmm20,0x01
	vpclmulqdq	zmm5,zmm10,zmm20,0x10
	vpclmulqdq	zmm0,zmm10,zmm20,0x11
	vpclmulqdq	zmm3,zmm10,zmm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_166
$L$_small_initial_partial_block_166:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[256+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vpxorq	zmm15,zmm0,zmm15
	vpxorq	zmm16,zmm3,zmm16
	vpxorq	zmm17,zmm4,zmm17
	vpxorq	zmm19,zmm5,zmm19
	vmovdqu64	ymm20,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm10,ymm20,0x01
	vpclmulqdq	ymm5,ymm10,ymm20,0x10
	vpclmulqdq	ymm0,ymm10,ymm20,0x11
	vpclmulqdq	ymm3,ymm10,ymm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_166:

	or	r13,r13
	je	NEAR $L$_after_reduction_166
	vpxorq	xmm14,xmm14,xmm13
$L$_after_reduction_166:
	jmp	NEAR $L$_small_initial_blocks_encrypted_155
$L$_small_initial_num_blocks_is_12_155:
	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vshufi64x2	zmm2,zmm2,zmm2,0
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm2,ZMMWORD[ddq_add_5678]
	vpaddd	zmm4,zmm0,ZMMWORD[ddq_add_8888]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	sub	r15,128
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm4,3
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vmovdqu8	zmm6,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm7,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm10{k1}{z},[128+r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm15
	vpxorq	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	zmm0,zmm0,zmm15
	vaesenclast	zmm3,zmm3,zmm15
	vaesenclast	zmm4,zmm4,zmm15
	vpxorq	zmm0,zmm0,zmm6
	vpxorq	zmm3,zmm3,zmm7
	vpxorq	zmm4,zmm4,zmm10
	vextracti32x4	xmm12,zmm4,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm6,zmm0,zmm29
	vpshufb	zmm7,zmm3,zmm29
	vpshufb	zmm10,zmm4,zmm29
	vextracti32x4	xmm13,zmm10,3
	sub	r13,16 * (12 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_167





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[224+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[288+rdx]
	vpclmulqdq	zmm6,zmm10,zmm20,0x11
	vpclmulqdq	zmm7,zmm10,zmm20,0x00
	vpternlogq	zmm15,zmm6,zmm0,0x96
	vpternlogq	zmm16,zmm7,zmm3,0x96
	vpclmulqdq	zmm6,zmm10,zmm20,0x01
	vpclmulqdq	zmm7,zmm10,zmm20,0x10
	vpternlogq	zmm17,zmm6,zmm4,0x96
	vpternlogq	zmm19,zmm7,zmm5,0x96

	vpxorq	zmm17,zmm17,zmm19
	vpsrldq	zmm4,zmm17,8
	vpslldq	zmm5,zmm17,8
	vpxorq	zmm0,zmm15,zmm4
	vpxorq	zmm3,zmm16,zmm5
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_167
$L$_small_initial_partial_block_167:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[240+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vpxorq	zmm15,zmm0,zmm15
	vpxorq	zmm16,zmm3,zmm16
	vpxorq	zmm17,zmm4,zmm17
	vpxorq	zmm19,zmm5,zmm19
	vmovdqu64	ymm20,YMMWORD[304+rdx]
	vinserti64x2	zmm20,zmm20,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm10,zmm20,0x01
	vpclmulqdq	zmm5,zmm10,zmm20,0x10
	vpclmulqdq	zmm0,zmm10,zmm20,0x11
	vpclmulqdq	zmm3,zmm10,zmm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_167:

	or	r13,r13
	je	NEAR $L$_after_reduction_167
	vpxorq	xmm14,xmm14,xmm13
$L$_after_reduction_167:
	jmp	NEAR $L$_small_initial_blocks_encrypted_155
$L$_small_initial_num_blocks_is_13_155:
	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vshufi64x2	zmm2,zmm2,zmm2,0
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm2,ZMMWORD[ddq_add_5678]
	vpaddd	zmm4,zmm0,ZMMWORD[ddq_add_8888]
	vpaddd	zmm5,zmm3,ZMMWORD[ddq_add_8888]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	sub	r15,192
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm5,0
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	xmm5,xmm5,xmm29
	vmovdqu8	zmm6,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm7,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm10,ZMMWORD[128+r11*1+r9]
	vmovdqu8	xmm11{k1}{z},[192+r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm15
	vpxorq	zmm4,zmm4,zmm15
	vpxorq	xmm5,xmm5,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	xmm5,xmm5,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	xmm5,xmm5,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	xmm5,xmm5,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	xmm5,xmm5,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	xmm5,xmm5,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	xmm5,xmm5,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	xmm5,xmm5,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	xmm5,xmm5,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	xmm5,xmm5,xmm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	zmm0,zmm0,zmm15
	vaesenclast	zmm3,zmm3,zmm15
	vaesenclast	zmm4,zmm4,zmm15
	vaesenclast	xmm5,xmm5,xmm15
	vpxorq	zmm0,zmm0,zmm6
	vpxorq	zmm3,zmm3,zmm7
	vpxorq	zmm4,zmm4,zmm10
	vpxorq	xmm5,xmm5,xmm11
	vextracti32x4	xmm12,zmm5,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	XMMWORD[192+r11*1+r10]{k1},xmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm6,zmm0,zmm29
	vpshufb	zmm7,zmm3,zmm29
	vpshufb	zmm10,zmm4,zmm29
	vpshufb	xmm11,xmm5,xmm29
	vextracti32x4	xmm13,zmm11,0
	sub	r13,16 * (13 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_168





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[208+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[272+rdx]
	vpclmulqdq	zmm6,zmm10,zmm20,0x11
	vpclmulqdq	zmm7,zmm10,zmm20,0x00
	vpternlogq	zmm15,zmm6,zmm0,0x96
	vpternlogq	zmm16,zmm7,zmm3,0x96
	vpclmulqdq	zmm6,zmm10,zmm20,0x01
	vpclmulqdq	zmm7,zmm10,zmm20,0x10
	vpternlogq	zmm17,zmm6,zmm4,0x96
	vpternlogq	zmm19,zmm7,zmm5,0x96
	vmovdqu64	xmm20,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm11,xmm20,0x01
	vpclmulqdq	xmm5,xmm11,xmm20,0x10
	vpclmulqdq	xmm0,xmm11,xmm20,0x11
	vpclmulqdq	xmm3,xmm11,xmm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_168
$L$_small_initial_partial_block_168:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[224+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[288+rdx]
	vpclmulqdq	zmm6,zmm10,zmm20,0x11
	vpclmulqdq	zmm7,zmm10,zmm20,0x00
	vpternlogq	zmm15,zmm6,zmm0,0x96
	vpternlogq	zmm16,zmm7,zmm3,0x96
	vpclmulqdq	zmm6,zmm10,zmm20,0x01
	vpclmulqdq	zmm7,zmm10,zmm20,0x10
	vpternlogq	zmm17,zmm6,zmm4,0x96
	vpternlogq	zmm19,zmm7,zmm5,0x96

	vpxorq	zmm17,zmm17,zmm19
	vpsrldq	zmm4,zmm17,8
	vpslldq	zmm5,zmm17,8
	vpxorq	zmm0,zmm15,zmm4
	vpxorq	zmm3,zmm16,zmm5
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_168:

	or	r13,r13
	je	NEAR $L$_after_reduction_168
	vpxorq	xmm14,xmm14,xmm13
$L$_after_reduction_168:
	jmp	NEAR $L$_small_initial_blocks_encrypted_155
$L$_small_initial_num_blocks_is_14_155:
	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vshufi64x2	zmm2,zmm2,zmm2,0
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm2,ZMMWORD[ddq_add_5678]
	vpaddd	zmm4,zmm0,ZMMWORD[ddq_add_8888]
	vpaddd	zmm5,zmm3,ZMMWORD[ddq_add_8888]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	sub	r15,192
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm5,1
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	ymm5,ymm5,ymm29
	vmovdqu8	zmm6,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm7,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm10,ZMMWORD[128+r11*1+r9]
	vmovdqu8	ymm11{k1}{z},[192+r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm15
	vpxorq	zmm4,zmm4,zmm15
	vpxorq	ymm5,ymm5,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	ymm5,ymm5,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	ymm5,ymm5,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	ymm5,ymm5,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	ymm5,ymm5,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	ymm5,ymm5,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	ymm5,ymm5,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	ymm5,ymm5,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	ymm5,ymm5,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	ymm5,ymm5,ymm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	zmm0,zmm0,zmm15
	vaesenclast	zmm3,zmm3,zmm15
	vaesenclast	zmm4,zmm4,zmm15
	vaesenclast	ymm5,ymm5,ymm15
	vpxorq	zmm0,zmm0,zmm6
	vpxorq	zmm3,zmm3,zmm7
	vpxorq	zmm4,zmm4,zmm10
	vpxorq	ymm5,ymm5,ymm11
	vextracti32x4	xmm12,zmm5,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	YMMWORD[192+r11*1+r10]{k1},ymm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm6,zmm0,zmm29
	vpshufb	zmm7,zmm3,zmm29
	vpshufb	zmm10,zmm4,zmm29
	vpshufb	ymm11,ymm5,ymm29
	vextracti32x4	xmm13,zmm11,1
	sub	r13,16 * (14 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_169





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[192+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[256+rdx]
	vpclmulqdq	zmm6,zmm10,zmm20,0x11
	vpclmulqdq	zmm7,zmm10,zmm20,0x00
	vpternlogq	zmm15,zmm6,zmm0,0x96
	vpternlogq	zmm16,zmm7,zmm3,0x96
	vpclmulqdq	zmm6,zmm10,zmm20,0x01
	vpclmulqdq	zmm7,zmm10,zmm20,0x10
	vpternlogq	zmm17,zmm6,zmm4,0x96
	vpternlogq	zmm19,zmm7,zmm5,0x96
	vmovdqu64	ymm20,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm11,ymm20,0x01
	vpclmulqdq	ymm5,ymm11,ymm20,0x10
	vpclmulqdq	ymm0,ymm11,ymm20,0x11
	vpclmulqdq	ymm3,ymm11,ymm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_169
$L$_small_initial_partial_block_169:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[208+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[272+rdx]
	vpclmulqdq	zmm6,zmm10,zmm20,0x11
	vpclmulqdq	zmm7,zmm10,zmm20,0x00
	vpternlogq	zmm15,zmm6,zmm0,0x96
	vpternlogq	zmm16,zmm7,zmm3,0x96
	vpclmulqdq	zmm6,zmm10,zmm20,0x01
	vpclmulqdq	zmm7,zmm10,zmm20,0x10
	vpternlogq	zmm17,zmm6,zmm4,0x96
	vpternlogq	zmm19,zmm7,zmm5,0x96
	vmovdqu64	xmm20,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm11,xmm20,0x01
	vpclmulqdq	xmm5,xmm11,xmm20,0x10
	vpclmulqdq	xmm0,xmm11,xmm20,0x11
	vpclmulqdq	xmm3,xmm11,xmm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_169:

	or	r13,r13
	je	NEAR $L$_after_reduction_169
	vpxorq	xmm14,xmm14,xmm13
$L$_after_reduction_169:
	jmp	NEAR $L$_small_initial_blocks_encrypted_155
$L$_small_initial_num_blocks_is_15_155:
	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vshufi64x2	zmm2,zmm2,zmm2,0
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm2,ZMMWORD[ddq_add_5678]
	vpaddd	zmm4,zmm0,ZMMWORD[ddq_add_8888]
	vpaddd	zmm5,zmm3,ZMMWORD[ddq_add_8888]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	sub	r15,192
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm5,2
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
	vmovdqu8	zmm6,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm7,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm10,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm11{k1}{z},[192+r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm15
	vpxorq	zmm4,zmm4,zmm15
	vpxorq	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	zmm0,zmm0,zmm15
	vaesenclast	zmm3,zmm3,zmm15
	vaesenclast	zmm4,zmm4,zmm15
	vaesenclast	zmm5,zmm5,zmm15
	vpxorq	zmm0,zmm0,zmm6
	vpxorq	zmm3,zmm3,zmm7
	vpxorq	zmm4,zmm4,zmm10
	vpxorq	zmm5,zmm5,zmm11
	vextracti32x4	xmm12,zmm5,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm6,zmm0,zmm29
	vpshufb	zmm7,zmm3,zmm29
	vpshufb	zmm10,zmm4,zmm29
	vpshufb	zmm11,zmm5,zmm29
	vextracti32x4	xmm13,zmm11,2
	sub	r13,16 * (15 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_170





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[176+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[240+rdx]
	vpclmulqdq	zmm6,zmm10,zmm20,0x11
	vpclmulqdq	zmm7,zmm10,zmm20,0x00
	vpternlogq	zmm15,zmm6,zmm0,0x96
	vpternlogq	zmm16,zmm7,zmm3,0x96
	vpclmulqdq	zmm6,zmm10,zmm20,0x01
	vpclmulqdq	zmm7,zmm10,zmm20,0x10
	vpternlogq	zmm17,zmm6,zmm4,0x96
	vpternlogq	zmm19,zmm7,zmm5,0x96
	vmovdqu64	ymm20,YMMWORD[304+rdx]
	vinserti64x2	zmm20,zmm20,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm11,zmm20,0x01
	vpclmulqdq	zmm5,zmm11,zmm20,0x10
	vpclmulqdq	zmm0,zmm11,zmm20,0x11
	vpclmulqdq	zmm3,zmm11,zmm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_170
$L$_small_initial_partial_block_170:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[192+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[256+rdx]
	vpclmulqdq	zmm6,zmm10,zmm20,0x11
	vpclmulqdq	zmm7,zmm10,zmm20,0x00
	vpternlogq	zmm15,zmm6,zmm0,0x96
	vpternlogq	zmm16,zmm7,zmm3,0x96
	vpclmulqdq	zmm6,zmm10,zmm20,0x01
	vpclmulqdq	zmm7,zmm10,zmm20,0x10
	vpternlogq	zmm17,zmm6,zmm4,0x96
	vpternlogq	zmm19,zmm7,zmm5,0x96
	vmovdqu64	ymm20,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm11,ymm20,0x01
	vpclmulqdq	ymm5,ymm11,ymm20,0x10
	vpclmulqdq	ymm0,ymm11,ymm20,0x11
	vpclmulqdq	ymm3,ymm11,ymm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_170:

	or	r13,r13
	je	NEAR $L$_after_reduction_170
	vpxorq	xmm14,xmm14,xmm13
$L$_after_reduction_170:
	jmp	NEAR $L$_small_initial_blocks_encrypted_155
$L$_small_initial_num_blocks_is_16_155:
	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vshufi64x2	zmm2,zmm2,zmm2,0
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm2,ZMMWORD[ddq_add_5678]
	vpaddd	zmm4,zmm0,ZMMWORD[ddq_add_8888]
	vpaddd	zmm5,zmm3,ZMMWORD[ddq_add_8888]
	lea	r10,[byte64_len_to_mask_table]
	mov	r15,r13
	sub	r15,192
	kmovq	k1,[r15*8+r10]
	vextracti32x4	xmm2,zmm5,3
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
	vmovdqu8	zmm6,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm7,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm10,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm11{k1}{z},[192+r11*1+r9]
	vbroadcastf64x2	zmm15,ZMMWORD[rcx]
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm15
	vpxorq	zmm4,zmm4,zmm15
	vpxorq	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[16+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[32+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[48+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[64+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[80+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[96+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[112+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[128+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm15
	vaesenc	zmm3,zmm3,zmm15
	vaesenc	zmm4,zmm4,zmm15
	vaesenc	zmm5,zmm5,zmm15
	vbroadcastf64x2	zmm15,ZMMWORD[160+rcx]
	vaesenclast	zmm0,zmm0,zmm15
	vaesenclast	zmm3,zmm3,zmm15
	vaesenclast	zmm4,zmm4,zmm15
	vaesenclast	zmm5,zmm5,zmm15
	vpxorq	zmm0,zmm0,zmm6
	vpxorq	zmm3,zmm3,zmm7
	vpxorq	zmm4,zmm4,zmm10
	vpxorq	zmm5,zmm5,zmm11
	vextracti32x4	xmm12,zmm5,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm6,zmm0,zmm29
	vpshufb	zmm7,zmm3,zmm29
	vpshufb	zmm10,zmm4,zmm29
	vpshufb	zmm11,zmm5,zmm29
	vextracti32x4	xmm13,zmm11,3
	sub	r13,16 * (16 - 1)
$L$_small_initial_partial_block_171:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm12
	vpxorq	zmm6,zmm6,zmm14
	vmovdqu64	zmm20,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm6,zmm20,0x11
	vpclmulqdq	zmm3,zmm6,zmm20,0x00
	vpclmulqdq	zmm4,zmm6,zmm20,0x01
	vpclmulqdq	zmm5,zmm6,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[176+rdx]
	vpclmulqdq	zmm15,zmm7,zmm20,0x11
	vpclmulqdq	zmm16,zmm7,zmm20,0x00
	vpclmulqdq	zmm17,zmm7,zmm20,0x01
	vpclmulqdq	zmm19,zmm7,zmm20,0x10
	vmovdqu64	zmm20,ZMMWORD[240+rdx]
	vpclmulqdq	zmm6,zmm10,zmm20,0x11
	vpclmulqdq	zmm7,zmm10,zmm20,0x00
	vpternlogq	zmm15,zmm6,zmm0,0x96
	vpternlogq	zmm16,zmm7,zmm3,0x96
	vpclmulqdq	zmm6,zmm10,zmm20,0x01
	vpclmulqdq	zmm7,zmm10,zmm20,0x10
	vpternlogq	zmm17,zmm6,zmm4,0x96
	vpternlogq	zmm19,zmm7,zmm5,0x96
	vmovdqu64	ymm20,YMMWORD[304+rdx]
	vinserti64x2	zmm20,zmm20,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm11,zmm20,0x01
	vpclmulqdq	zmm5,zmm11,zmm20,0x10
	vpclmulqdq	zmm0,zmm11,zmm20,0x11
	vpclmulqdq	zmm3,zmm11,zmm20,0x00

	vpxorq	zmm4,zmm4,zmm17
	vpxorq	zmm5,zmm5,zmm19
	vpxorq	zmm0,zmm0,zmm15
	vpxorq	zmm3,zmm3,zmm16

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm17,zmm4,8
	vpslldq	zmm19,zmm4,8
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti64x4	ymm17,zmm0,1
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm17,ymm0,1
	vpxorq	xmm0,xmm0,xmm17
	vextracti64x4	ymm19,zmm3,1
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm19,ymm3,1
	vpxorq	xmm3,xmm3,xmm19
	vmovdqa64	xmm20,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm20,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm20,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm20,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_171:
	vpxorq	xmm14,xmm14,xmm13
$L$_after_reduction_171:
$L$_small_initial_blocks_encrypted_155:
$L$_ghash_done_10:
	vmovdqu64	XMMWORD[rdx],xmm2
	vmovdqu64	XMMWORD[64+rdx],xmm14
$L$_enc_dec_done_10:
	jmp	NEAR $L$exit_gcm_encrypt
ALIGN	32
$L$aes_gcm_encrypt_192_avx512:
	cmp	QWORD[112+rbp],0
	je	NEAR $L$_enc_dec_done_172
	xor	r14,r14
	vmovdqu64	xmm14,XMMWORD[64+rdx]

	mov	r11,QWORD[r8]
	or	r11,r11
	je	NEAR $L$_partial_block_done_173
	mov	r10d,16
	lea	r12,[byte_len_to_mask_table]
	cmp	QWORD[112+rbp],r10
	cmovc	r10,QWORD[112+rbp]
	add	r12,r10
	add	r12,r10
	kmovw	k1,[r12]
	vmovdqu8	xmm0{k1}{z},[r9]

	vmovdqu64	xmm3,XMMWORD[16+rdx]
	vmovdqu64	xmm4,XMMWORD[336+rdx]



	lea	r12,[SHIFT_MASK]
	add	r12,r11
	vmovdqu64	xmm5,XMMWORD[r12]
	vpshufb	xmm3,xmm3,xmm5
	vpxorq	xmm3,xmm3,xmm0


	mov	r13,QWORD[112+rbp]
	add	r13,r11
	sub	r13,16
	jge	NEAR $L$_no_extra_mask_173
	sub	r12,r13
$L$_no_extra_mask_173:



	vmovdqu64	xmm0,XMMWORD[16+r12]
	vpand	xmm3,xmm3,xmm0
	vpshufb	xmm3,xmm3,XMMWORD[SHUF_MASK]
	vpshufb	xmm3,xmm3,xmm5
	vpxorq	xmm14,xmm14,xmm3
	cmp	r13,0
	jl	NEAR $L$_partial_incomplete_173

	vpclmulqdq	xmm7,xmm14,xmm4,0x11
	vpclmulqdq	xmm10,xmm14,xmm4,0x00
	vpclmulqdq	xmm11,xmm14,xmm4,0x01
	vpclmulqdq	xmm14,xmm14,xmm4,0x10
	vpxorq	xmm14,xmm14,xmm11

	vpsrldq	xmm11,xmm14,8
	vpslldq	xmm14,xmm14,8
	vpxorq	xmm7,xmm7,xmm11
	vpxorq	xmm14,xmm14,xmm10



	vmovdqu64	xmm11,XMMWORD[POLY2]

	vpclmulqdq	xmm10,xmm11,xmm14,0x01
	vpslldq	xmm10,xmm10,8
	vpxorq	xmm14,xmm14,xmm10



	vpclmulqdq	xmm10,xmm11,xmm14,0x00
	vpsrldq	xmm10,xmm10,4
	vpclmulqdq	xmm14,xmm11,xmm14,0x10
	vpslldq	xmm14,xmm14,4

	vpternlogq	xmm14,xmm7,xmm10,0x96

	mov	QWORD[r8],0

	mov	r12,r11
	mov	r11,16
	sub	r11,r12
	jmp	NEAR $L$_enc_dec_done_173

$L$_partial_incomplete_173:
	mov	r12,QWORD[112+rbp]
	add	QWORD[r8],r12
	mov	r11,QWORD[112+rbp]

$L$_enc_dec_done_173:


	lea	r12,[byte_len_to_mask_table]
	kmovw	k1,[r11*2+r12]
	vmovdqu64	XMMWORD[64+rdx],xmm14

	vpshufb	xmm3,xmm3,XMMWORD[SHUF_MASK]
	vpshufb	xmm3,xmm3,xmm5
	mov	r12,QWORD[120+rbp]
	vmovdqu8	XMMWORD[r12]{k1},xmm3
$L$_partial_block_done_173:
	vmovdqu64	xmm2,XMMWORD[rdx]
	mov	r13,QWORD[112+rbp]
	sub	r13,r11
	je	NEAR $L$_enc_dec_done_172
	cmp	r13,256
	jbe	NEAR $L$_message_below_equal_16_blocks_172

	vmovdqa64	zmm29,ZMMWORD[SHUF_MASK]
	vmovdqa64	zmm27,ZMMWORD[ddq_addbe_4444]
	vmovdqa64	zmm28,ZMMWORD[ddq_addbe_1234]






	vmovd	r15d,xmm2
	and	r15d,255

	vshufi64x2	zmm2,zmm2,zmm2,0
	vpshufb	zmm2,zmm2,zmm29



	cmp	r15b,240
	jae	NEAR $L$_next_16_overflow_174
	vpaddd	zmm7,zmm2,zmm28
	vpaddd	zmm10,zmm7,zmm27
	vpaddd	zmm11,zmm10,zmm27
	vpaddd	zmm12,zmm11,zmm27
	jmp	NEAR $L$_next_16_ok_174
$L$_next_16_overflow_174:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm12,ZMMWORD[ddq_add_4444]
	vpaddd	zmm7,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm10,zmm7,zmm12
	vpaddd	zmm11,zmm10,zmm12
	vpaddd	zmm12,zmm11,zmm12
	vpshufb	zmm7,zmm7,zmm29
	vpshufb	zmm10,zmm10,zmm29
	vpshufb	zmm11,zmm11,zmm29
	vpshufb	zmm12,zmm12,zmm29
$L$_next_16_ok_174:
	vshufi64x2	zmm2,zmm12,zmm12,255
	add	r15b,16

	vmovdqu8	zmm0,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm3,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm4,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm5,ZMMWORD[192+r11*1+r9]


	vbroadcastf64x2	zmm6,ZMMWORD[rcx]
	vpxorq	zmm7,zmm7,zmm6
	vpxorq	zmm10,zmm10,zmm6
	vpxorq	zmm11,zmm11,zmm6
	vpxorq	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[16+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[32+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[48+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[64+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[80+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[96+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[112+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[128+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[144+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[160+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[176+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[192+rcx]
	vaesenclast	zmm7,zmm7,zmm6
	vaesenclast	zmm10,zmm10,zmm6
	vaesenclast	zmm11,zmm11,zmm6
	vaesenclast	zmm12,zmm12,zmm6


	vpxorq	zmm7,zmm7,zmm0
	vpxorq	zmm10,zmm10,zmm3
	vpxorq	zmm11,zmm11,zmm4
	vpxorq	zmm12,zmm12,zmm5


	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm7
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm10
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm11
	vmovdqu8	ZMMWORD[192+r11*1+r10],zmm12

	vpshufb	zmm7,zmm7,zmm29
	vpshufb	zmm10,zmm10,zmm29
	vpshufb	zmm11,zmm11,zmm29
	vpshufb	zmm12,zmm12,zmm29
	vmovdqa64	ZMMWORD[768+rsp],zmm7
	vmovdqa64	ZMMWORD[832+rsp],zmm10
	vmovdqa64	ZMMWORD[896+rsp],zmm11
	vmovdqa64	ZMMWORD[960+rsp],zmm12
	test	r14,r14
	jnz	NEAR $L$_skip_hkeys_precomputation_175

	vmovdqu64	zmm0,ZMMWORD[288+rdx]
	vmovdqu64	ZMMWORD[704+rsp],zmm0

	vmovdqu64	zmm3,ZMMWORD[224+rdx]
	vmovdqu64	ZMMWORD[640+rsp],zmm3


	vshufi64x2	zmm3,zmm3,zmm3,0x00

	vmovdqu64	zmm4,ZMMWORD[160+rdx]
	vmovdqu64	ZMMWORD[576+rsp],zmm4

	vmovdqu64	zmm5,ZMMWORD[96+rdx]
	vmovdqu64	ZMMWORD[512+rsp],zmm5
$L$_skip_hkeys_precomputation_175:
	cmp	r13,512
	jb	NEAR $L$_message_below_32_blocks_172



	cmp	r15b,240
	jae	NEAR $L$_next_16_overflow_176
	vpaddd	zmm7,zmm2,zmm28
	vpaddd	zmm10,zmm7,zmm27
	vpaddd	zmm11,zmm10,zmm27
	vpaddd	zmm12,zmm11,zmm27
	jmp	NEAR $L$_next_16_ok_176
$L$_next_16_overflow_176:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm12,ZMMWORD[ddq_add_4444]
	vpaddd	zmm7,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm10,zmm7,zmm12
	vpaddd	zmm11,zmm10,zmm12
	vpaddd	zmm12,zmm11,zmm12
	vpshufb	zmm7,zmm7,zmm29
	vpshufb	zmm10,zmm10,zmm29
	vpshufb	zmm11,zmm11,zmm29
	vpshufb	zmm12,zmm12,zmm29
$L$_next_16_ok_176:
	vshufi64x2	zmm2,zmm12,zmm12,255
	add	r15b,16

	vmovdqu8	zmm0,ZMMWORD[256+r11*1+r9]
	vmovdqu8	zmm3,ZMMWORD[320+r11*1+r9]
	vmovdqu8	zmm4,ZMMWORD[384+r11*1+r9]
	vmovdqu8	zmm5,ZMMWORD[448+r11*1+r9]


	vbroadcastf64x2	zmm6,ZMMWORD[rcx]
	vpxorq	zmm7,zmm7,zmm6
	vpxorq	zmm10,zmm10,zmm6
	vpxorq	zmm11,zmm11,zmm6
	vpxorq	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[16+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[32+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[48+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[64+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[80+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[96+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[112+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[128+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[144+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[160+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[176+rcx]
	vaesenc	zmm7,zmm7,zmm6
	vaesenc	zmm10,zmm10,zmm6
	vaesenc	zmm11,zmm11,zmm6
	vaesenc	zmm12,zmm12,zmm6
	vbroadcastf64x2	zmm6,ZMMWORD[192+rcx]
	vaesenclast	zmm7,zmm7,zmm6
	vaesenclast	zmm10,zmm10,zmm6
	vaesenclast	zmm11,zmm11,zmm6
	vaesenclast	zmm12,zmm12,zmm6


	vpxorq	zmm7,zmm7,zmm0
	vpxorq	zmm10,zmm10,zmm3
	vpxorq	zmm11,zmm11,zmm4
	vpxorq	zmm12,zmm12,zmm5


	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[256+r11*1+r10],zmm7
	vmovdqu8	ZMMWORD[320+r11*1+r10],zmm10
	vmovdqu8	ZMMWORD[384+r11*1+r10],zmm11
	vmovdqu8	ZMMWORD[448+r11*1+r10],zmm12

	vpshufb	zmm7,zmm7,zmm29
	vpshufb	zmm10,zmm10,zmm29
	vpshufb	zmm11,zmm11,zmm29
	vpshufb	zmm12,zmm12,zmm29
	vmovdqa64	ZMMWORD[1024+rsp],zmm7
	vmovdqa64	ZMMWORD[1088+rsp],zmm10
	vmovdqa64	ZMMWORD[1152+rsp],zmm11
	vmovdqa64	ZMMWORD[1216+rsp],zmm12
	test	r14,r14
	jnz	NEAR $L$_skip_hkeys_precomputation_177
	vmovdqu64	zmm3,ZMMWORD[640+rsp]


	vshufi64x2	zmm3,zmm3,zmm3,0x00

	vmovdqu64	zmm4,ZMMWORD[576+rsp]
	vmovdqu64	zmm5,ZMMWORD[512+rsp]

	vpclmulqdq	zmm6,zmm4,zmm3,0x11
	vpclmulqdq	zmm7,zmm4,zmm3,0x00
	vpclmulqdq	zmm10,zmm4,zmm3,0x01
	vpclmulqdq	zmm4,zmm4,zmm3,0x10
	vpxorq	zmm4,zmm4,zmm10

	vpsrldq	zmm10,zmm4,8
	vpslldq	zmm4,zmm4,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm4,zmm4,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm4,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm4,zmm4,zmm7



	vpclmulqdq	zmm7,zmm10,zmm4,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm4,zmm10,zmm4,0x10
	vpslldq	zmm4,zmm4,4

	vpternlogq	zmm4,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[448+rsp],zmm4

	vpclmulqdq	zmm6,zmm5,zmm3,0x11
	vpclmulqdq	zmm7,zmm5,zmm3,0x00
	vpclmulqdq	zmm10,zmm5,zmm3,0x01
	vpclmulqdq	zmm5,zmm5,zmm3,0x10
	vpxorq	zmm5,zmm5,zmm10

	vpsrldq	zmm10,zmm5,8
	vpslldq	zmm5,zmm5,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm5,zmm5,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm5,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm5,zmm5,zmm7



	vpclmulqdq	zmm7,zmm10,zmm5,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm5,zmm10,zmm5,0x10
	vpslldq	zmm5,zmm5,4

	vpternlogq	zmm5,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[384+rsp],zmm5

	vpclmulqdq	zmm6,zmm4,zmm3,0x11
	vpclmulqdq	zmm7,zmm4,zmm3,0x00
	vpclmulqdq	zmm10,zmm4,zmm3,0x01
	vpclmulqdq	zmm4,zmm4,zmm3,0x10
	vpxorq	zmm4,zmm4,zmm10

	vpsrldq	zmm10,zmm4,8
	vpslldq	zmm4,zmm4,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm4,zmm4,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm4,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm4,zmm4,zmm7



	vpclmulqdq	zmm7,zmm10,zmm4,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm4,zmm10,zmm4,0x10
	vpslldq	zmm4,zmm4,4

	vpternlogq	zmm4,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[320+rsp],zmm4

	vpclmulqdq	zmm6,zmm5,zmm3,0x11
	vpclmulqdq	zmm7,zmm5,zmm3,0x00
	vpclmulqdq	zmm10,zmm5,zmm3,0x01
	vpclmulqdq	zmm5,zmm5,zmm3,0x10
	vpxorq	zmm5,zmm5,zmm10

	vpsrldq	zmm10,zmm5,8
	vpslldq	zmm5,zmm5,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm5,zmm5,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm5,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm5,zmm5,zmm7



	vpclmulqdq	zmm7,zmm10,zmm5,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm5,zmm10,zmm5,0x10
	vpslldq	zmm5,zmm5,4

	vpternlogq	zmm5,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[256+rsp],zmm5

	vpclmulqdq	zmm6,zmm4,zmm3,0x11
	vpclmulqdq	zmm7,zmm4,zmm3,0x00
	vpclmulqdq	zmm10,zmm4,zmm3,0x01
	vpclmulqdq	zmm4,zmm4,zmm3,0x10
	vpxorq	zmm4,zmm4,zmm10

	vpsrldq	zmm10,zmm4,8
	vpslldq	zmm4,zmm4,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm4,zmm4,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm4,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm4,zmm4,zmm7



	vpclmulqdq	zmm7,zmm10,zmm4,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm4,zmm10,zmm4,0x10
	vpslldq	zmm4,zmm4,4

	vpternlogq	zmm4,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[192+rsp],zmm4

	vpclmulqdq	zmm6,zmm5,zmm3,0x11
	vpclmulqdq	zmm7,zmm5,zmm3,0x00
	vpclmulqdq	zmm10,zmm5,zmm3,0x01
	vpclmulqdq	zmm5,zmm5,zmm3,0x10
	vpxorq	zmm5,zmm5,zmm10

	vpsrldq	zmm10,zmm5,8
	vpslldq	zmm5,zmm5,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm5,zmm5,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm5,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm5,zmm5,zmm7



	vpclmulqdq	zmm7,zmm10,zmm5,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm5,zmm10,zmm5,0x10
	vpslldq	zmm5,zmm5,4

	vpternlogq	zmm5,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[128+rsp],zmm5

	vpclmulqdq	zmm6,zmm4,zmm3,0x11
	vpclmulqdq	zmm7,zmm4,zmm3,0x00
	vpclmulqdq	zmm10,zmm4,zmm3,0x01
	vpclmulqdq	zmm4,zmm4,zmm3,0x10
	vpxorq	zmm4,zmm4,zmm10

	vpsrldq	zmm10,zmm4,8
	vpslldq	zmm4,zmm4,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm4,zmm4,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm4,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm4,zmm4,zmm7



	vpclmulqdq	zmm7,zmm10,zmm4,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm4,zmm10,zmm4,0x10
	vpslldq	zmm4,zmm4,4

	vpternlogq	zmm4,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[64+rsp],zmm4

	vpclmulqdq	zmm6,zmm5,zmm3,0x11
	vpclmulqdq	zmm7,zmm5,zmm3,0x00
	vpclmulqdq	zmm10,zmm5,zmm3,0x01
	vpclmulqdq	zmm5,zmm5,zmm3,0x10
	vpxorq	zmm5,zmm5,zmm10

	vpsrldq	zmm10,zmm5,8
	vpslldq	zmm5,zmm5,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm5,zmm5,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm5,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm5,zmm5,zmm7



	vpclmulqdq	zmm7,zmm10,zmm5,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm5,zmm10,zmm5,0x10
	vpslldq	zmm5,zmm5,4

	vpternlogq	zmm5,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[rsp],zmm5
$L$_skip_hkeys_precomputation_177:
	mov	r14,1
	add	r11,512
	sub	r13,512

	cmp	r13,768
	jb	NEAR $L$_no_more_big_nblocks_172
$L$_encrypt_big_nblocks_172:
	cmp	r15b,240
	jae	NEAR $L$_16_blocks_overflow_178
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_178
$L$_16_blocks_overflow_178:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_178:
	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rsp]




	vshufi64x2	zmm2,zmm5,zmm5,255
	add	r15b,16


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]







	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]



	vpclmulqdq	zmm6,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]



	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00


	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]



	vpternlogq	zmm6,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]



	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21,ZMMWORD[192+r11*1+r9]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00


	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm26,zmm10,zmm15
	vpxorq	zmm24,zmm6,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]

	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30



	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21



	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10],zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
	vmovdqa64	ZMMWORD[1280+rsp],zmm0
	vmovdqa64	ZMMWORD[1344+rsp],zmm3
	vmovdqa64	ZMMWORD[1408+rsp],zmm4
	vmovdqa64	ZMMWORD[1472+rsp],zmm5
	cmp	r15b,240
	jae	NEAR $L$_16_blocks_overflow_179
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_179
$L$_16_blocks_overflow_179:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_179:
	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[256+rsp]




	vshufi64x2	zmm2,zmm5,zmm5,255
	add	r15b,16


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[320+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]







	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]



	vpclmulqdq	zmm6,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[384+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[448+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]



	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00


	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]



	vpternlogq	zmm6,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]



	vmovdqu8	zmm17,ZMMWORD[256+r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[320+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[384+r11*1+r9]
	vmovdqu8	zmm21,ZMMWORD[448+r11*1+r9]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00


	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vpternlogq	zmm24,zmm6,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]

	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30



	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21



	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[256+r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[320+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[384+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[448+r11*1+r10],zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
	vmovdqa64	ZMMWORD[768+rsp],zmm0
	vmovdqa64	ZMMWORD[832+rsp],zmm3
	vmovdqa64	ZMMWORD[896+rsp],zmm4
	vmovdqa64	ZMMWORD[960+rsp],zmm5
	cmp	r15b,240
	jae	NEAR $L$_16_blocks_overflow_180
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_180
$L$_16_blocks_overflow_180:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_180:
	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]




	vshufi64x2	zmm2,zmm5,zmm5,255
	add	r15b,16


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]







	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]



	vpclmulqdq	zmm6,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]



	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00


	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]



	vpternlogq	zmm6,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]



	vmovdqu8	zmm17,ZMMWORD[512+r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[576+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[640+r11*1+r9]
	vmovdqu8	zmm21,ZMMWORD[704+r11*1+r9]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00


	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]


	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96

	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpternlogq	zmm6,zmm12,zmm15,0x96
	vpxorq	zmm6,zmm6,zmm24
	vpternlogq	zmm7,zmm13,zmm10,0x96
	vpxorq	zmm7,zmm7,zmm25
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vextracti64x4	ymm12,zmm6,1
	vpxorq	ymm6,ymm6,ymm12
	vextracti32x4	xmm12,ymm6,1
	vpxorq	xmm6,xmm6,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]

	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm6,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30



	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21



	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[512+r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[576+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[640+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[704+r11*1+r10],zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
	vmovdqa64	ZMMWORD[1024+rsp],zmm0
	vmovdqa64	ZMMWORD[1088+rsp],zmm3
	vmovdqa64	ZMMWORD[1152+rsp],zmm4
	vmovdqa64	ZMMWORD[1216+rsp],zmm5
	vmovdqa64	zmm14,zmm6

	add	r11,768
	sub	r13,768
	cmp	r13,768
	jae	NEAR $L$_encrypt_big_nblocks_172

$L$_no_more_big_nblocks_172:

	cmp	r13,512
	jae	NEAR $L$_encrypt_32_blocks_172

	cmp	r13,256
	jae	NEAR $L$_encrypt_16_blocks_172
$L$_encrypt_0_blocks_ghash_32_172:
	mov	r10d,r13d
	and	r10d,~15
	mov	ebx,256
	sub	ebx,r10d
	vmovdqa64	zmm13,ZMMWORD[768+rsp]
	vpxorq	zmm13,zmm13,zmm14
	vmovdqu64	zmm12,ZMMWORD[rbx*1+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[832+rsp]
	vmovdqu64	zmm12,ZMMWORD[64+rbx*1+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10
	vpxorq	zmm26,zmm4,zmm10
	vpxorq	zmm24,zmm0,zmm6
	vpxorq	zmm25,zmm3,zmm7
	vpternlogq	zmm26,zmm5,zmm11,0x96
	vmovdqa64	zmm13,ZMMWORD[896+rsp]
	vmovdqu64	zmm12,ZMMWORD[128+rbx*1+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[960+rsp]
	vmovdqu64	zmm12,ZMMWORD[192+rbx*1+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10

	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96
	add	ebx,256
	mov	r10d,r13d
	add	r10d,15
	shr	r10d,4
	je	NEAR $L$_last_num_blocks_is_0_181

	cmp	r10d,8
	je	NEAR $L$_last_num_blocks_is_8_181
	jb	NEAR $L$_last_num_blocks_is_7_1_181


	cmp	r10d,12
	je	NEAR $L$_last_num_blocks_is_12_181
	jb	NEAR $L$_last_num_blocks_is_11_9_181


	cmp	r10d,15
	je	NEAR $L$_last_num_blocks_is_15_181
	ja	NEAR $L$_last_num_blocks_is_16_181
	cmp	r10d,14
	je	NEAR $L$_last_num_blocks_is_14_181
	jmp	NEAR $L$_last_num_blocks_is_13_181

$L$_last_num_blocks_is_11_9_181:

	cmp	r10d,10
	je	NEAR $L$_last_num_blocks_is_10_181
	ja	NEAR $L$_last_num_blocks_is_11_181
	jmp	NEAR $L$_last_num_blocks_is_9_181

$L$_last_num_blocks_is_7_1_181:
	cmp	r10d,4
	je	NEAR $L$_last_num_blocks_is_4_181
	jb	NEAR $L$_last_num_blocks_is_3_1_181

	cmp	r10d,6
	ja	NEAR $L$_last_num_blocks_is_7_181
	je	NEAR $L$_last_num_blocks_is_6_181
	jmp	NEAR $L$_last_num_blocks_is_5_181

$L$_last_num_blocks_is_3_1_181:

	cmp	r10d,2
	ja	NEAR $L$_last_num_blocks_is_3_181
	je	NEAR $L$_last_num_blocks_is_2_181
$L$_last_num_blocks_is_1_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,255
	jae	NEAR $L$_16_blocks_overflow_182
	vpaddd	xmm0,xmm2,xmm28
	jmp	NEAR $L$_16_blocks_ok_182

$L$_16_blocks_overflow_182:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	xmm0,xmm0,xmm29
$L$_16_blocks_ok_182:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	xmm17{k1}{z},[r11*1+r9]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	xmm0,xmm0,xmm31
	vaesenclast	xmm0,xmm0,xmm30
	vpxorq	xmm0,xmm0,xmm17
	vextracti32x4	xmm11,zmm0,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	XMMWORD[r11*1+r10]{k1},xmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	xmm17,xmm0,xmm29
	vextracti32x4	xmm7,zmm17,0


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_183





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_183
$L$_small_initial_partial_block_183:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11


	vpsrldq	zmm0,zmm26,8
	vpslldq	zmm3,zmm26,8
	vpxorq	zmm24,zmm24,zmm0
	vpxorq	zmm25,zmm25,zmm3
	vextracti64x4	ymm0,zmm24,1
	vpxorq	ymm24,ymm24,ymm0
	vextracti32x4	xmm0,ymm24,1
	vpxorq	xmm24,xmm24,xmm0
	vextracti64x4	ymm3,zmm25,1
	vpxorq	ymm25,ymm25,ymm3
	vextracti32x4	xmm3,ymm25,1
	vpxorq	xmm25,xmm25,xmm3
	vmovdqa64	xmm0,XMMWORD[POLY2]


	vpclmulqdq	xmm3,xmm0,xmm25,0x01
	vpslldq	xmm3,xmm3,8
	vpxorq	xmm3,xmm25,xmm3


	vpclmulqdq	xmm4,xmm0,xmm3,0x00
	vpsrldq	xmm4,xmm4,4
	vpclmulqdq	xmm14,xmm0,xmm3,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm4,xmm24,0x96












	vpxorq	xmm14,xmm14,xmm7

	jmp	NEAR $L$_after_reduction_183
$L$_small_initial_compute_done_183:
$L$_after_reduction_183:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_2_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,254
	jae	NEAR $L$_16_blocks_overflow_184
	vpaddd	ymm0,ymm2,ymm28
	jmp	NEAR $L$_16_blocks_ok_184

$L$_16_blocks_overflow_184:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	ymm0,ymm0,ymm29
$L$_16_blocks_ok_184:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	ymm17{k1}{z},[r11*1+r9]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	ymm0,ymm0,ymm31
	vaesenclast	ymm0,ymm0,ymm30
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm11,zmm0,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	YMMWORD[r11*1+r10]{k1},ymm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	ymm17,ymm0,ymm29
	vextracti32x4	xmm7,zmm17,1
	sub	r13,16 * (2 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_185





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_185
$L$_small_initial_partial_block_185:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_185:

	or	r13,r13
	je	NEAR $L$_after_reduction_185
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_185:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_3_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,253
	jae	NEAR $L$_16_blocks_overflow_186
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_186

$L$_16_blocks_overflow_186:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_186:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,2
	sub	r13,16 * (3 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_187





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_187
$L$_small_initial_partial_block_187:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_187:

	or	r13,r13
	je	NEAR $L$_after_reduction_187
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_187:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_4_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,252
	jae	NEAR $L$_16_blocks_overflow_188
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_188

$L$_16_blocks_overflow_188:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_188:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,3
	sub	r13,16 * (4 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_189





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_189
$L$_small_initial_partial_block_189:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_189:

	or	r13,r13
	je	NEAR $L$_after_reduction_189
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_189:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_5_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,251
	jae	NEAR $L$_16_blocks_overflow_190
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	xmm3,xmm0,xmm27
	jmp	NEAR $L$_16_blocks_ok_190

$L$_16_blocks_overflow_190:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	xmm3,xmm3,xmm29
$L$_16_blocks_ok_190:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	xmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	xmm3,xmm3,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	xmm3,xmm3,xmm19
	vextracti32x4	xmm11,zmm3,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	XMMWORD[64+r11*1+r10]{k1},xmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	xmm19,xmm3,xmm29
	vextracti32x4	xmm7,zmm19,0
	sub	r13,16 * (5 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_191





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_191
$L$_small_initial_partial_block_191:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_191:

	or	r13,r13
	je	NEAR $L$_after_reduction_191
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_191:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_6_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,250
	jae	NEAR $L$_16_blocks_overflow_192
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	ymm3,ymm0,ymm27
	jmp	NEAR $L$_16_blocks_ok_192

$L$_16_blocks_overflow_192:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	ymm3,ymm3,ymm29
$L$_16_blocks_ok_192:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	ymm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	ymm3,ymm3,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm11,zmm3,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	YMMWORD[64+r11*1+r10]{k1},ymm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	ymm19,ymm3,ymm29
	vextracti32x4	xmm7,zmm19,1
	sub	r13,16 * (6 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_193





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_193
$L$_small_initial_partial_block_193:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_193:

	or	r13,r13
	je	NEAR $L$_after_reduction_193
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_193:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_7_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,249
	jae	NEAR $L$_16_blocks_overflow_194
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_194

$L$_16_blocks_overflow_194:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_194:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,2
	sub	r13,16 * (7 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_195





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_195
$L$_small_initial_partial_block_195:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_195:

	or	r13,r13
	je	NEAR $L$_after_reduction_195
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_195:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_8_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,248
	jae	NEAR $L$_16_blocks_overflow_196
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_196

$L$_16_blocks_overflow_196:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_196:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,3
	sub	r13,16 * (8 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_197





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_197
$L$_small_initial_partial_block_197:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_197:

	or	r13,r13
	je	NEAR $L$_after_reduction_197
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_197:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_9_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,247
	jae	NEAR $L$_16_blocks_overflow_198
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	xmm4,xmm3,xmm27
	jmp	NEAR $L$_16_blocks_ok_198

$L$_16_blocks_overflow_198:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	xmm4,xmm4,xmm29
$L$_16_blocks_ok_198:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	xmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	xmm4,xmm4,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	xmm4,xmm4,xmm20
	vextracti32x4	xmm11,zmm4,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	XMMWORD[128+r11*1+r10]{k1},xmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	xmm20,xmm4,xmm29
	vextracti32x4	xmm7,zmm20,0
	sub	r13,16 * (9 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_199





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm20,xmm1,0x01
	vpclmulqdq	xmm5,xmm20,xmm1,0x10
	vpclmulqdq	xmm0,xmm20,xmm1,0x11
	vpclmulqdq	xmm3,xmm20,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_199
$L$_small_initial_partial_block_199:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_199:

	or	r13,r13
	je	NEAR $L$_after_reduction_199
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_199:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_10_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,246
	jae	NEAR $L$_16_blocks_overflow_200
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	ymm4,ymm3,ymm27
	jmp	NEAR $L$_16_blocks_ok_200

$L$_16_blocks_overflow_200:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	ymm4,ymm4,ymm29
$L$_16_blocks_ok_200:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	ymm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	ymm4,ymm4,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	ymm4,ymm4,ymm20
	vextracti32x4	xmm11,zmm4,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	YMMWORD[128+r11*1+r10]{k1},ymm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	ymm20,ymm4,ymm29
	vextracti32x4	xmm7,zmm20,1
	sub	r13,16 * (10 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_201





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm20,ymm1,0x01
	vpclmulqdq	ymm5,ymm20,ymm1,0x10
	vpclmulqdq	ymm0,ymm20,ymm1,0x11
	vpclmulqdq	ymm3,ymm20,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_201
$L$_small_initial_partial_block_201:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm20,xmm1,0x01
	vpclmulqdq	xmm5,xmm20,xmm1,0x10
	vpclmulqdq	xmm0,xmm20,xmm1,0x11
	vpclmulqdq	xmm3,xmm20,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_201:

	or	r13,r13
	je	NEAR $L$_after_reduction_201
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_201:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_11_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,245
	jae	NEAR $L$_16_blocks_overflow_202
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	jmp	NEAR $L$_16_blocks_ok_202

$L$_16_blocks_overflow_202:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
$L$_16_blocks_ok_202:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vextracti32x4	xmm11,zmm4,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vextracti32x4	xmm7,zmm20,2
	sub	r13,16 * (11 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_203





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm20,zmm1,0x01
	vpclmulqdq	zmm5,zmm20,zmm1,0x10
	vpclmulqdq	zmm0,zmm20,zmm1,0x11
	vpclmulqdq	zmm3,zmm20,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_203
$L$_small_initial_partial_block_203:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm20,ymm1,0x01
	vpclmulqdq	ymm5,ymm20,ymm1,0x10
	vpclmulqdq	ymm0,ymm20,ymm1,0x11
	vpclmulqdq	ymm3,ymm20,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_203:

	or	r13,r13
	je	NEAR $L$_after_reduction_203
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_203:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_12_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,244
	jae	NEAR $L$_16_blocks_overflow_204
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	jmp	NEAR $L$_16_blocks_ok_204

$L$_16_blocks_overflow_204:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
$L$_16_blocks_ok_204:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vextracti32x4	xmm11,zmm4,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vextracti32x4	xmm7,zmm20,3
	sub	r13,16 * (12 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_205





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_205
$L$_small_initial_partial_block_205:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm20,zmm1,0x01
	vpclmulqdq	zmm5,zmm20,zmm1,0x10
	vpclmulqdq	zmm0,zmm20,zmm1,0x11
	vpclmulqdq	zmm3,zmm20,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_205:

	or	r13,r13
	je	NEAR $L$_after_reduction_205
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_205:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_13_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,243
	jae	NEAR $L$_16_blocks_overflow_206
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	xmm5,xmm4,xmm27
	jmp	NEAR $L$_16_blocks_ok_206

$L$_16_blocks_overflow_206:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	xmm5,xmm5,xmm29
$L$_16_blocks_ok_206:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	xmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	xmm5,xmm5,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	xmm5,xmm5,xmm21
	vextracti32x4	xmm11,zmm5,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	XMMWORD[192+r11*1+r10]{k1},xmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	xmm21,xmm5,xmm29
	vextracti32x4	xmm7,zmm21,0
	sub	r13,16 * (13 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_207





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm21,xmm1,0x01
	vpclmulqdq	xmm5,xmm21,xmm1,0x10
	vpclmulqdq	xmm0,xmm21,xmm1,0x11
	vpclmulqdq	xmm3,xmm21,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_207
$L$_small_initial_partial_block_207:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_207:

	or	r13,r13
	je	NEAR $L$_after_reduction_207
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_207:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_14_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,242
	jae	NEAR $L$_16_blocks_overflow_208
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	ymm5,ymm4,ymm27
	jmp	NEAR $L$_16_blocks_ok_208

$L$_16_blocks_overflow_208:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	ymm5,ymm5,ymm29
$L$_16_blocks_ok_208:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	ymm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	ymm5,ymm5,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	ymm5,ymm5,ymm21
	vextracti32x4	xmm11,zmm5,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	YMMWORD[192+r11*1+r10]{k1},ymm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	ymm21,ymm5,ymm29
	vextracti32x4	xmm7,zmm21,1
	sub	r13,16 * (14 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_209





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm21,ymm1,0x01
	vpclmulqdq	ymm5,ymm21,ymm1,0x10
	vpclmulqdq	ymm0,ymm21,ymm1,0x11
	vpclmulqdq	ymm3,ymm21,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_209
$L$_small_initial_partial_block_209:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm21,xmm1,0x01
	vpclmulqdq	xmm5,xmm21,xmm1,0x10
	vpclmulqdq	xmm0,xmm21,xmm1,0x11
	vpclmulqdq	xmm3,xmm21,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_209:

	or	r13,r13
	je	NEAR $L$_after_reduction_209
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_209:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_15_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,241
	jae	NEAR $L$_16_blocks_overflow_210
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_210

$L$_16_blocks_overflow_210:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_210:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21
	vextracti32x4	xmm11,zmm5,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	zmm21,zmm5,zmm29
	vextracti32x4	xmm7,zmm21,2
	sub	r13,16 * (15 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_211





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm21,zmm1,0x01
	vpclmulqdq	zmm5,zmm21,zmm1,0x10
	vpclmulqdq	zmm0,zmm21,zmm1,0x11
	vpclmulqdq	zmm3,zmm21,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_211
$L$_small_initial_partial_block_211:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm21,ymm1,0x01
	vpclmulqdq	ymm5,ymm21,ymm1,0x10
	vpclmulqdq	ymm0,ymm21,ymm1,0x11
	vpclmulqdq	ymm3,ymm21,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_211:

	or	r13,r13
	je	NEAR $L$_after_reduction_211
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_211:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_16_181:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,240
	jae	NEAR $L$_16_blocks_overflow_212
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_212

$L$_16_blocks_overflow_212:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_212:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm24,zmm14,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21
	vextracti32x4	xmm11,zmm5,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	zmm21,zmm5,zmm29
	vextracti32x4	xmm7,zmm21,3
	sub	r13,16 * (16 - 1)
$L$_small_initial_partial_block_213:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm21,zmm1,0x01
	vpclmulqdq	zmm5,zmm21,zmm1,0x10
	vpclmulqdq	zmm0,zmm21,zmm1,0x11
	vpclmulqdq	zmm3,zmm21,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_213:
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_213:
	jmp	NEAR $L$_last_blocks_done_181
$L$_last_num_blocks_is_0_181:
	vmovdqa64	zmm13,ZMMWORD[1024+rsp]
	vmovdqu64	zmm12,ZMMWORD[rbx*1+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1088+rsp]
	vmovdqu64	zmm12,ZMMWORD[64+rbx*1+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10
	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96
	vmovdqa64	zmm13,ZMMWORD[1152+rsp]
	vmovdqu64	zmm12,ZMMWORD[128+rbx*1+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1216+rsp]
	vmovdqu64	zmm12,ZMMWORD[192+rbx*1+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10

	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96

	vpsrldq	zmm0,zmm26,8
	vpslldq	zmm3,zmm26,8
	vpxorq	zmm24,zmm24,zmm0
	vpxorq	zmm25,zmm25,zmm3
	vextracti64x4	ymm0,zmm24,1
	vpxorq	ymm24,ymm24,ymm0
	vextracti32x4	xmm0,ymm24,1
	vpxorq	xmm24,xmm24,xmm0
	vextracti64x4	ymm3,zmm25,1
	vpxorq	ymm25,ymm25,ymm3
	vextracti32x4	xmm3,ymm25,1
	vpxorq	xmm25,xmm25,xmm3
	vmovdqa64	xmm4,XMMWORD[POLY2]


	vpclmulqdq	xmm0,xmm4,xmm25,0x01
	vpslldq	xmm0,xmm0,8
	vpxorq	xmm0,xmm25,xmm0


	vpclmulqdq	xmm3,xmm4,xmm0,0x00
	vpsrldq	xmm3,xmm3,4
	vpclmulqdq	xmm14,xmm4,xmm0,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm3,xmm24,0x96

$L$_last_blocks_done_181:
	vpshufb	xmm2,xmm2,xmm29
	jmp	NEAR $L$_ghash_done_172
$L$_encrypt_32_blocks_172:
	cmp	r15b,240
	jae	NEAR $L$_16_blocks_overflow_214
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_214
$L$_16_blocks_overflow_214:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_214:
	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rsp]




	vshufi64x2	zmm2,zmm5,zmm5,255
	add	r15b,16


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]







	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]



	vpclmulqdq	zmm6,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]



	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00


	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]



	vpternlogq	zmm6,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]



	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21,ZMMWORD[192+r11*1+r9]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00


	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm26,zmm10,zmm15
	vpxorq	zmm24,zmm6,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]

	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30



	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21



	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10],zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
	vmovdqa64	ZMMWORD[1280+rsp],zmm0
	vmovdqa64	ZMMWORD[1344+rsp],zmm3
	vmovdqa64	ZMMWORD[1408+rsp],zmm4
	vmovdqa64	ZMMWORD[1472+rsp],zmm5
	cmp	r15b,240
	jae	NEAR $L$_16_blocks_overflow_215
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_215
$L$_16_blocks_overflow_215:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_215:
	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1024+rsp]
	vmovdqu64	zmm1,ZMMWORD[256+rsp]




	vshufi64x2	zmm2,zmm5,zmm5,255
	add	r15b,16


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[320+rsp]
	vmovdqa64	zmm22,ZMMWORD[1088+rsp]







	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]



	vpclmulqdq	zmm6,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[384+rsp]
	vmovdqa64	zmm8,ZMMWORD[1152+rsp]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[448+rsp]
	vmovdqa64	zmm22,ZMMWORD[1216+rsp]



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]



	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00


	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]



	vpternlogq	zmm6,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]



	vmovdqu8	zmm17,ZMMWORD[256+r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[320+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[384+r11*1+r9]
	vmovdqu8	zmm21,ZMMWORD[448+r11*1+r9]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00


	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm26,zmm10,zmm15,0x96
	vpternlogq	zmm24,zmm6,zmm12,0x96
	vpternlogq	zmm25,zmm7,zmm13,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]

	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30



	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21



	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[256+r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[320+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[384+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[448+r11*1+r10],zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
	vmovdqa64	ZMMWORD[768+rsp],zmm0
	vmovdqa64	ZMMWORD[832+rsp],zmm3
	vmovdqa64	ZMMWORD[896+rsp],zmm4
	vmovdqa64	ZMMWORD[960+rsp],zmm5
	vmovdqa64	zmm13,ZMMWORD[1280+rsp]
	vmovdqu64	zmm12,ZMMWORD[512+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1344+rsp]
	vmovdqu64	zmm12,ZMMWORD[576+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10
	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96
	vmovdqa64	zmm13,ZMMWORD[1408+rsp]
	vmovdqu64	zmm12,ZMMWORD[640+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1472+rsp]
	vmovdqu64	zmm12,ZMMWORD[704+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10

	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96

	vpsrldq	zmm0,zmm26,8
	vpslldq	zmm3,zmm26,8
	vpxorq	zmm24,zmm24,zmm0
	vpxorq	zmm25,zmm25,zmm3
	vextracti64x4	ymm0,zmm24,1
	vpxorq	ymm24,ymm24,ymm0
	vextracti32x4	xmm0,ymm24,1
	vpxorq	xmm24,xmm24,xmm0
	vextracti64x4	ymm3,zmm25,1
	vpxorq	ymm25,ymm25,ymm3
	vextracti32x4	xmm3,ymm25,1
	vpxorq	xmm25,xmm25,xmm3
	vmovdqa64	xmm4,XMMWORD[POLY2]


	vpclmulqdq	xmm0,xmm4,xmm25,0x01
	vpslldq	xmm0,xmm0,8
	vpxorq	xmm0,xmm25,xmm0


	vpclmulqdq	xmm3,xmm4,xmm0,0x00
	vpsrldq	xmm3,xmm3,4
	vpclmulqdq	xmm14,xmm4,xmm0,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm3,xmm24,0x96

	sub	r13,512
	add	r11,512
	mov	r10d,r13d
	and	r10d,~15
	mov	ebx,512
	sub	ebx,r10d
	mov	r10d,r13d
	add	r10d,15
	shr	r10d,4
	je	NEAR $L$_last_num_blocks_is_0_216

	cmp	r10d,8
	je	NEAR $L$_last_num_blocks_is_8_216
	jb	NEAR $L$_last_num_blocks_is_7_1_216


	cmp	r10d,12
	je	NEAR $L$_last_num_blocks_is_12_216
	jb	NEAR $L$_last_num_blocks_is_11_9_216


	cmp	r10d,15
	je	NEAR $L$_last_num_blocks_is_15_216
	ja	NEAR $L$_last_num_blocks_is_16_216
	cmp	r10d,14
	je	NEAR $L$_last_num_blocks_is_14_216
	jmp	NEAR $L$_last_num_blocks_is_13_216

$L$_last_num_blocks_is_11_9_216:

	cmp	r10d,10
	je	NEAR $L$_last_num_blocks_is_10_216
	ja	NEAR $L$_last_num_blocks_is_11_216
	jmp	NEAR $L$_last_num_blocks_is_9_216

$L$_last_num_blocks_is_7_1_216:
	cmp	r10d,4
	je	NEAR $L$_last_num_blocks_is_4_216
	jb	NEAR $L$_last_num_blocks_is_3_1_216

	cmp	r10d,6
	ja	NEAR $L$_last_num_blocks_is_7_216
	je	NEAR $L$_last_num_blocks_is_6_216
	jmp	NEAR $L$_last_num_blocks_is_5_216

$L$_last_num_blocks_is_3_1_216:

	cmp	r10d,2
	ja	NEAR $L$_last_num_blocks_is_3_216
	je	NEAR $L$_last_num_blocks_is_2_216
$L$_last_num_blocks_is_1_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,255
	jae	NEAR $L$_16_blocks_overflow_217
	vpaddd	xmm0,xmm2,xmm28
	jmp	NEAR $L$_16_blocks_ok_217

$L$_16_blocks_overflow_217:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	xmm0,xmm0,xmm29
$L$_16_blocks_ok_217:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	xmm17{k1}{z},[r11*1+r9]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	xmm0,xmm0,xmm31
	vaesenclast	xmm0,xmm0,xmm30
	vpxorq	xmm0,xmm0,xmm17
	vextracti32x4	xmm11,zmm0,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	XMMWORD[r11*1+r10]{k1},xmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	xmm17,xmm0,xmm29
	vextracti32x4	xmm7,zmm17,0


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_218





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_218
$L$_small_initial_partial_block_218:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11


	vpsrldq	zmm0,zmm26,8
	vpslldq	zmm3,zmm26,8
	vpxorq	zmm24,zmm24,zmm0
	vpxorq	zmm25,zmm25,zmm3
	vextracti64x4	ymm0,zmm24,1
	vpxorq	ymm24,ymm24,ymm0
	vextracti32x4	xmm0,ymm24,1
	vpxorq	xmm24,xmm24,xmm0
	vextracti64x4	ymm3,zmm25,1
	vpxorq	ymm25,ymm25,ymm3
	vextracti32x4	xmm3,ymm25,1
	vpxorq	xmm25,xmm25,xmm3
	vmovdqa64	xmm0,XMMWORD[POLY2]


	vpclmulqdq	xmm3,xmm0,xmm25,0x01
	vpslldq	xmm3,xmm3,8
	vpxorq	xmm3,xmm25,xmm3


	vpclmulqdq	xmm4,xmm0,xmm3,0x00
	vpsrldq	xmm4,xmm4,4
	vpclmulqdq	xmm14,xmm0,xmm3,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm4,xmm24,0x96












	vpxorq	xmm14,xmm14,xmm7

	jmp	NEAR $L$_after_reduction_218
$L$_small_initial_compute_done_218:
$L$_after_reduction_218:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_2_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,254
	jae	NEAR $L$_16_blocks_overflow_219
	vpaddd	ymm0,ymm2,ymm28
	jmp	NEAR $L$_16_blocks_ok_219

$L$_16_blocks_overflow_219:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	ymm0,ymm0,ymm29
$L$_16_blocks_ok_219:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	ymm17{k1}{z},[r11*1+r9]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	ymm0,ymm0,ymm31
	vaesenclast	ymm0,ymm0,ymm30
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm11,zmm0,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	YMMWORD[r11*1+r10]{k1},ymm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	ymm17,ymm0,ymm29
	vextracti32x4	xmm7,zmm17,1
	sub	r13,16 * (2 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_220





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_220
$L$_small_initial_partial_block_220:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_220:

	or	r13,r13
	je	NEAR $L$_after_reduction_220
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_220:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_3_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,253
	jae	NEAR $L$_16_blocks_overflow_221
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_221

$L$_16_blocks_overflow_221:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_221:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,2
	sub	r13,16 * (3 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_222





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_222
$L$_small_initial_partial_block_222:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_222:

	or	r13,r13
	je	NEAR $L$_after_reduction_222
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_222:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_4_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,252
	jae	NEAR $L$_16_blocks_overflow_223
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_223

$L$_16_blocks_overflow_223:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_223:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,3
	sub	r13,16 * (4 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_224





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_224
$L$_small_initial_partial_block_224:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_224:

	or	r13,r13
	je	NEAR $L$_after_reduction_224
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_224:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_5_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,251
	jae	NEAR $L$_16_blocks_overflow_225
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	xmm3,xmm0,xmm27
	jmp	NEAR $L$_16_blocks_ok_225

$L$_16_blocks_overflow_225:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	xmm3,xmm3,xmm29
$L$_16_blocks_ok_225:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	xmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	xmm3,xmm3,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	xmm3,xmm3,xmm19
	vextracti32x4	xmm11,zmm3,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	XMMWORD[64+r11*1+r10]{k1},xmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	xmm19,xmm3,xmm29
	vextracti32x4	xmm7,zmm19,0
	sub	r13,16 * (5 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_226





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_226
$L$_small_initial_partial_block_226:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_226:

	or	r13,r13
	je	NEAR $L$_after_reduction_226
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_226:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_6_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,250
	jae	NEAR $L$_16_blocks_overflow_227
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	ymm3,ymm0,ymm27
	jmp	NEAR $L$_16_blocks_ok_227

$L$_16_blocks_overflow_227:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	ymm3,ymm3,ymm29
$L$_16_blocks_ok_227:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	ymm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	ymm3,ymm3,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm11,zmm3,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	YMMWORD[64+r11*1+r10]{k1},ymm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	ymm19,ymm3,ymm29
	vextracti32x4	xmm7,zmm19,1
	sub	r13,16 * (6 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_228





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_228
$L$_small_initial_partial_block_228:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_228:

	or	r13,r13
	je	NEAR $L$_after_reduction_228
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_228:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_7_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,249
	jae	NEAR $L$_16_blocks_overflow_229
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_229

$L$_16_blocks_overflow_229:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_229:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,2
	sub	r13,16 * (7 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_230





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_230
$L$_small_initial_partial_block_230:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_230:

	or	r13,r13
	je	NEAR $L$_after_reduction_230
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_230:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_8_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,248
	jae	NEAR $L$_16_blocks_overflow_231
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_231

$L$_16_blocks_overflow_231:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_231:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,3
	sub	r13,16 * (8 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_232





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_232
$L$_small_initial_partial_block_232:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_232:

	or	r13,r13
	je	NEAR $L$_after_reduction_232
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_232:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_9_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,247
	jae	NEAR $L$_16_blocks_overflow_233
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	xmm4,xmm3,xmm27
	jmp	NEAR $L$_16_blocks_ok_233

$L$_16_blocks_overflow_233:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	xmm4,xmm4,xmm29
$L$_16_blocks_ok_233:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	xmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	xmm4,xmm4,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	xmm4,xmm4,xmm20
	vextracti32x4	xmm11,zmm4,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	XMMWORD[128+r11*1+r10]{k1},xmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	xmm20,xmm4,xmm29
	vextracti32x4	xmm7,zmm20,0
	sub	r13,16 * (9 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_234





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm20,xmm1,0x01
	vpclmulqdq	xmm5,xmm20,xmm1,0x10
	vpclmulqdq	xmm0,xmm20,xmm1,0x11
	vpclmulqdq	xmm3,xmm20,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_234
$L$_small_initial_partial_block_234:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_234:

	or	r13,r13
	je	NEAR $L$_after_reduction_234
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_234:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_10_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,246
	jae	NEAR $L$_16_blocks_overflow_235
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	ymm4,ymm3,ymm27
	jmp	NEAR $L$_16_blocks_ok_235

$L$_16_blocks_overflow_235:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	ymm4,ymm4,ymm29
$L$_16_blocks_ok_235:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	ymm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	ymm4,ymm4,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	ymm4,ymm4,ymm20
	vextracti32x4	xmm11,zmm4,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	YMMWORD[128+r11*1+r10]{k1},ymm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	ymm20,ymm4,ymm29
	vextracti32x4	xmm7,zmm20,1
	sub	r13,16 * (10 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_236





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm20,ymm1,0x01
	vpclmulqdq	ymm5,ymm20,ymm1,0x10
	vpclmulqdq	ymm0,ymm20,ymm1,0x11
	vpclmulqdq	ymm3,ymm20,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_236
$L$_small_initial_partial_block_236:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm20,xmm1,0x01
	vpclmulqdq	xmm5,xmm20,xmm1,0x10
	vpclmulqdq	xmm0,xmm20,xmm1,0x11
	vpclmulqdq	xmm3,xmm20,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_236:

	or	r13,r13
	je	NEAR $L$_after_reduction_236
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_236:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_11_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,245
	jae	NEAR $L$_16_blocks_overflow_237
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	jmp	NEAR $L$_16_blocks_ok_237

$L$_16_blocks_overflow_237:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
$L$_16_blocks_ok_237:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vextracti32x4	xmm11,zmm4,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vextracti32x4	xmm7,zmm20,2
	sub	r13,16 * (11 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_238





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm20,zmm1,0x01
	vpclmulqdq	zmm5,zmm20,zmm1,0x10
	vpclmulqdq	zmm0,zmm20,zmm1,0x11
	vpclmulqdq	zmm3,zmm20,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_238
$L$_small_initial_partial_block_238:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm20,ymm1,0x01
	vpclmulqdq	ymm5,ymm20,ymm1,0x10
	vpclmulqdq	ymm0,ymm20,ymm1,0x11
	vpclmulqdq	ymm3,ymm20,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_238:

	or	r13,r13
	je	NEAR $L$_after_reduction_238
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_238:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_12_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,244
	jae	NEAR $L$_16_blocks_overflow_239
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	jmp	NEAR $L$_16_blocks_ok_239

$L$_16_blocks_overflow_239:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
$L$_16_blocks_ok_239:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vextracti32x4	xmm11,zmm4,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vextracti32x4	xmm7,zmm20,3
	sub	r13,16 * (12 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_240





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_240
$L$_small_initial_partial_block_240:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm20,zmm1,0x01
	vpclmulqdq	zmm5,zmm20,zmm1,0x10
	vpclmulqdq	zmm0,zmm20,zmm1,0x11
	vpclmulqdq	zmm3,zmm20,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_240:

	or	r13,r13
	je	NEAR $L$_after_reduction_240
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_240:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_13_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,243
	jae	NEAR $L$_16_blocks_overflow_241
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	xmm5,xmm4,xmm27
	jmp	NEAR $L$_16_blocks_ok_241

$L$_16_blocks_overflow_241:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	xmm5,xmm5,xmm29
$L$_16_blocks_ok_241:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	xmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	xmm5,xmm5,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	xmm5,xmm5,xmm21
	vextracti32x4	xmm11,zmm5,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	XMMWORD[192+r11*1+r10]{k1},xmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	xmm21,xmm5,xmm29
	vextracti32x4	xmm7,zmm21,0
	sub	r13,16 * (13 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_242





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm21,xmm1,0x01
	vpclmulqdq	xmm5,xmm21,xmm1,0x10
	vpclmulqdq	xmm0,xmm21,xmm1,0x11
	vpclmulqdq	xmm3,xmm21,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_242
$L$_small_initial_partial_block_242:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_242:

	or	r13,r13
	je	NEAR $L$_after_reduction_242
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_242:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_14_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,242
	jae	NEAR $L$_16_blocks_overflow_243
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	ymm5,ymm4,ymm27
	jmp	NEAR $L$_16_blocks_ok_243

$L$_16_blocks_overflow_243:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	ymm5,ymm5,ymm29
$L$_16_blocks_ok_243:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	ymm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	ymm5,ymm5,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	ymm5,ymm5,ymm21
	vextracti32x4	xmm11,zmm5,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	YMMWORD[192+r11*1+r10]{k1},ymm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	ymm21,ymm5,ymm29
	vextracti32x4	xmm7,zmm21,1
	sub	r13,16 * (14 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_244





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm21,ymm1,0x01
	vpclmulqdq	ymm5,ymm21,ymm1,0x10
	vpclmulqdq	ymm0,ymm21,ymm1,0x11
	vpclmulqdq	ymm3,ymm21,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_244
$L$_small_initial_partial_block_244:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm21,xmm1,0x01
	vpclmulqdq	xmm5,xmm21,xmm1,0x10
	vpclmulqdq	xmm0,xmm21,xmm1,0x11
	vpclmulqdq	xmm3,xmm21,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_244:

	or	r13,r13
	je	NEAR $L$_after_reduction_244
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_244:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_15_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,241
	jae	NEAR $L$_16_blocks_overflow_245
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_245

$L$_16_blocks_overflow_245:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_245:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21
	vextracti32x4	xmm11,zmm5,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	zmm21,zmm5,zmm29
	vextracti32x4	xmm7,zmm21,2
	sub	r13,16 * (15 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_246





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm21,zmm1,0x01
	vpclmulqdq	zmm5,zmm21,zmm1,0x10
	vpclmulqdq	zmm0,zmm21,zmm1,0x11
	vpclmulqdq	zmm3,zmm21,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_246
$L$_small_initial_partial_block_246:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm21,ymm1,0x01
	vpclmulqdq	ymm5,ymm21,ymm1,0x10
	vpclmulqdq	ymm0,ymm21,ymm1,0x11
	vpclmulqdq	ymm3,ymm21,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_246:

	or	r13,r13
	je	NEAR $L$_after_reduction_246
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_246:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_16_216:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,240
	jae	NEAR $L$_16_blocks_overflow_247
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_247

$L$_16_blocks_overflow_247:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_247:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm5,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21
	vextracti32x4	xmm11,zmm5,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	zmm21,zmm5,zmm29
	vextracti32x4	xmm7,zmm21,3
	sub	r13,16 * (16 - 1)
$L$_small_initial_partial_block_248:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm21,zmm1,0x01
	vpclmulqdq	zmm5,zmm21,zmm1,0x10
	vpclmulqdq	zmm0,zmm21,zmm1,0x11
	vpclmulqdq	zmm3,zmm21,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_248:
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_248:
	jmp	NEAR $L$_last_blocks_done_216
$L$_last_num_blocks_is_0_216:
	vmovdqa64	zmm13,ZMMWORD[768+rsp]
	vpxorq	zmm13,zmm13,zmm14
	vmovdqu64	zmm12,ZMMWORD[rbx*1+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[832+rsp]
	vmovdqu64	zmm12,ZMMWORD[64+rbx*1+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10
	vpxorq	zmm26,zmm4,zmm10
	vpxorq	zmm24,zmm0,zmm6
	vpxorq	zmm25,zmm3,zmm7
	vpternlogq	zmm26,zmm5,zmm11,0x96
	vmovdqa64	zmm13,ZMMWORD[896+rsp]
	vmovdqu64	zmm12,ZMMWORD[128+rbx*1+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[960+rsp]
	vmovdqu64	zmm12,ZMMWORD[192+rbx*1+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10

	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96

	vpsrldq	zmm0,zmm26,8
	vpslldq	zmm3,zmm26,8
	vpxorq	zmm24,zmm24,zmm0
	vpxorq	zmm25,zmm25,zmm3
	vextracti64x4	ymm0,zmm24,1
	vpxorq	ymm24,ymm24,ymm0
	vextracti32x4	xmm0,ymm24,1
	vpxorq	xmm24,xmm24,xmm0
	vextracti64x4	ymm3,zmm25,1
	vpxorq	ymm25,ymm25,ymm3
	vextracti32x4	xmm3,ymm25,1
	vpxorq	xmm25,xmm25,xmm3
	vmovdqa64	xmm4,XMMWORD[POLY2]


	vpclmulqdq	xmm0,xmm4,xmm25,0x01
	vpslldq	xmm0,xmm0,8
	vpxorq	xmm0,xmm25,xmm0


	vpclmulqdq	xmm3,xmm4,xmm0,0x00
	vpsrldq	xmm3,xmm3,4
	vpclmulqdq	xmm14,xmm4,xmm0,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm3,xmm24,0x96

$L$_last_blocks_done_216:
	vpshufb	xmm2,xmm2,xmm29
	jmp	NEAR $L$_ghash_done_172
$L$_encrypt_16_blocks_172:
	cmp	r15b,240
	jae	NEAR $L$_16_blocks_overflow_249
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_249
$L$_16_blocks_overflow_249:
	vpshufb	zmm2,zmm2,zmm29
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_249:
	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rsp]




	vshufi64x2	zmm2,zmm5,zmm5,255
	add	r15b,16


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]







	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]



	vpclmulqdq	zmm6,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]



	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00


	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]



	vpternlogq	zmm6,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96



	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]



	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21,ZMMWORD[192+r11*1+r9]



	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]



	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00


	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm26,zmm10,zmm15
	vpxorq	zmm24,zmm6,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]

	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30



	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21



	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10],zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
	vmovdqa64	ZMMWORD[1280+rsp],zmm0
	vmovdqa64	ZMMWORD[1344+rsp],zmm3
	vmovdqa64	ZMMWORD[1408+rsp],zmm4
	vmovdqa64	ZMMWORD[1472+rsp],zmm5
	vmovdqa64	zmm13,ZMMWORD[1024+rsp]
	vmovdqu64	zmm12,ZMMWORD[256+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1088+rsp]
	vmovdqu64	zmm12,ZMMWORD[320+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10
	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96
	vmovdqa64	zmm13,ZMMWORD[1152+rsp]
	vmovdqu64	zmm12,ZMMWORD[384+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1216+rsp]
	vmovdqu64	zmm12,ZMMWORD[448+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10

	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96
	sub	r13,256
	add	r11,256
	mov	r10d,r13d
	add	r10d,15
	shr	r10d,4
	je	NEAR $L$_last_num_blocks_is_0_250

	cmp	r10d,8
	je	NEAR $L$_last_num_blocks_is_8_250
	jb	NEAR $L$_last_num_blocks_is_7_1_250


	cmp	r10d,12
	je	NEAR $L$_last_num_blocks_is_12_250
	jb	NEAR $L$_last_num_blocks_is_11_9_250


	cmp	r10d,15
	je	NEAR $L$_last_num_blocks_is_15_250
	ja	NEAR $L$_last_num_blocks_is_16_250
	cmp	r10d,14
	je	NEAR $L$_last_num_blocks_is_14_250
	jmp	NEAR $L$_last_num_blocks_is_13_250

$L$_last_num_blocks_is_11_9_250:

	cmp	r10d,10
	je	NEAR $L$_last_num_blocks_is_10_250
	ja	NEAR $L$_last_num_blocks_is_11_250
	jmp	NEAR $L$_last_num_blocks_is_9_250

$L$_last_num_blocks_is_7_1_250:
	cmp	r10d,4
	je	NEAR $L$_last_num_blocks_is_4_250
	jb	NEAR $L$_last_num_blocks_is_3_1_250

	cmp	r10d,6
	ja	NEAR $L$_last_num_blocks_is_7_250
	je	NEAR $L$_last_num_blocks_is_6_250
	jmp	NEAR $L$_last_num_blocks_is_5_250

$L$_last_num_blocks_is_3_1_250:

	cmp	r10d,2
	ja	NEAR $L$_last_num_blocks_is_3_250
	je	NEAR $L$_last_num_blocks_is_2_250
$L$_last_num_blocks_is_1_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,255
	jae	NEAR $L$_16_blocks_overflow_251
	vpaddd	xmm0,xmm2,xmm28
	jmp	NEAR $L$_16_blocks_ok_251

$L$_16_blocks_overflow_251:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	xmm0,xmm0,xmm29
$L$_16_blocks_ok_251:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm0,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	xmm17{k1}{z},[r11*1+r9]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	xmm0,xmm0,xmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	xmm0,xmm0,xmm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	xmm0,xmm0,xmm30
	vpxorq	xmm0,xmm0,xmm17
	vextracti32x4	xmm11,zmm0,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	XMMWORD[r11*1+r10]{k1},xmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	xmm17,xmm0,xmm29
	vextracti32x4	xmm7,zmm17,0


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_252





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_252
$L$_small_initial_partial_block_252:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11











	vpxorq	xmm14,xmm14,xmm7

	jmp	NEAR $L$_after_reduction_252
$L$_small_initial_compute_done_252:
$L$_after_reduction_252:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_2_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,254
	jae	NEAR $L$_16_blocks_overflow_253
	vpaddd	ymm0,ymm2,ymm28
	jmp	NEAR $L$_16_blocks_ok_253

$L$_16_blocks_overflow_253:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	ymm0,ymm0,ymm29
$L$_16_blocks_ok_253:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm0,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	ymm17{k1}{z},[r11*1+r9]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	ymm0,ymm0,ymm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	ymm0,ymm0,ymm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	ymm0,ymm0,ymm30
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm11,zmm0,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	YMMWORD[r11*1+r10]{k1},ymm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	ymm17,ymm0,ymm29
	vextracti32x4	xmm7,zmm17,1
	sub	r13,16 * (2 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_254





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_254
$L$_small_initial_partial_block_254:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_254:

	or	r13,r13
	je	NEAR $L$_after_reduction_254
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_254:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_3_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,253
	jae	NEAR $L$_16_blocks_overflow_255
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_255

$L$_16_blocks_overflow_255:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_255:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm0,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,2
	sub	r13,16 * (3 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_256





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_256
$L$_small_initial_partial_block_256:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_256:

	or	r13,r13
	je	NEAR $L$_after_reduction_256
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_256:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_4_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,252
	jae	NEAR $L$_16_blocks_overflow_257
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_257

$L$_16_blocks_overflow_257:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_257:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm0,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,3
	sub	r13,16 * (4 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_258





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_258
$L$_small_initial_partial_block_258:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_258:

	or	r13,r13
	je	NEAR $L$_after_reduction_258
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_258:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_5_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,251
	jae	NEAR $L$_16_blocks_overflow_259
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	xmm3,xmm0,xmm27
	jmp	NEAR $L$_16_blocks_ok_259

$L$_16_blocks_overflow_259:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	xmm3,xmm3,xmm29
$L$_16_blocks_ok_259:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm3,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	xmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	xmm3,xmm3,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	xmm3,xmm3,xmm19
	vextracti32x4	xmm11,zmm3,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	XMMWORD[64+r11*1+r10]{k1},xmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	xmm19,xmm3,xmm29
	vextracti32x4	xmm7,zmm19,0
	sub	r13,16 * (5 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_260





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_260
$L$_small_initial_partial_block_260:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_260:

	or	r13,r13
	je	NEAR $L$_after_reduction_260
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_260:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_6_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,250
	jae	NEAR $L$_16_blocks_overflow_261
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	ymm3,ymm0,ymm27
	jmp	NEAR $L$_16_blocks_ok_261

$L$_16_blocks_overflow_261:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	ymm3,ymm3,ymm29
$L$_16_blocks_ok_261:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm3,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	ymm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	ymm3,ymm3,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm11,zmm3,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	YMMWORD[64+r11*1+r10]{k1},ymm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	ymm19,ymm3,ymm29
	vextracti32x4	xmm7,zmm19,1
	sub	r13,16 * (6 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_262





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_262
$L$_small_initial_partial_block_262:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_262:

	or	r13,r13
	je	NEAR $L$_after_reduction_262
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_262:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_7_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,249
	jae	NEAR $L$_16_blocks_overflow_263
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_263

$L$_16_blocks_overflow_263:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_263:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm3,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,2
	sub	r13,16 * (7 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_264





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_264
$L$_small_initial_partial_block_264:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_264:

	or	r13,r13
	je	NEAR $L$_after_reduction_264
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_264:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_8_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,248
	jae	NEAR $L$_16_blocks_overflow_265
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_265

$L$_16_blocks_overflow_265:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_265:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm3,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,3
	sub	r13,16 * (8 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_266





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_266
$L$_small_initial_partial_block_266:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_266:

	or	r13,r13
	je	NEAR $L$_after_reduction_266
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_266:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_9_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,247
	jae	NEAR $L$_16_blocks_overflow_267
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	xmm4,xmm3,xmm27
	jmp	NEAR $L$_16_blocks_ok_267

$L$_16_blocks_overflow_267:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	xmm4,xmm4,xmm29
$L$_16_blocks_ok_267:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm4,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	xmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	xmm4,xmm4,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	xmm4,xmm4,xmm20
	vextracti32x4	xmm11,zmm4,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	XMMWORD[128+r11*1+r10]{k1},xmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	xmm20,xmm4,xmm29
	vextracti32x4	xmm7,zmm20,0
	sub	r13,16 * (9 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_268





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm20,xmm1,0x01
	vpclmulqdq	xmm5,xmm20,xmm1,0x10
	vpclmulqdq	xmm0,xmm20,xmm1,0x11
	vpclmulqdq	xmm3,xmm20,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_268
$L$_small_initial_partial_block_268:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_268:

	or	r13,r13
	je	NEAR $L$_after_reduction_268
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_268:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_10_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,246
	jae	NEAR $L$_16_blocks_overflow_269
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	ymm4,ymm3,ymm27
	jmp	NEAR $L$_16_blocks_ok_269

$L$_16_blocks_overflow_269:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	ymm4,ymm4,ymm29
$L$_16_blocks_ok_269:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm4,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	ymm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	ymm4,ymm4,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	ymm4,ymm4,ymm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	ymm4,ymm4,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	ymm4,ymm4,ymm20
	vextracti32x4	xmm11,zmm4,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	YMMWORD[128+r11*1+r10]{k1},ymm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	ymm20,ymm4,ymm29
	vextracti32x4	xmm7,zmm20,1
	sub	r13,16 * (10 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_270





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm20,ymm1,0x01
	vpclmulqdq	ymm5,ymm20,ymm1,0x10
	vpclmulqdq	ymm0,ymm20,ymm1,0x11
	vpclmulqdq	ymm3,ymm20,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_270
$L$_small_initial_partial_block_270:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm20,xmm1,0x01
	vpclmulqdq	xmm5,xmm20,xmm1,0x10
	vpclmulqdq	xmm0,xmm20,xmm1,0x11
	vpclmulqdq	xmm3,xmm20,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_270:

	or	r13,r13
	je	NEAR $L$_after_reduction_270
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_270:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_11_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,245
	jae	NEAR $L$_16_blocks_overflow_271
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	jmp	NEAR $L$_16_blocks_ok_271

$L$_16_blocks_overflow_271:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
$L$_16_blocks_ok_271:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm4,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vextracti32x4	xmm11,zmm4,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vextracti32x4	xmm7,zmm20,2
	sub	r13,16 * (11 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_272





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm20,zmm1,0x01
	vpclmulqdq	zmm5,zmm20,zmm1,0x10
	vpclmulqdq	zmm0,zmm20,zmm1,0x11
	vpclmulqdq	zmm3,zmm20,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_272
$L$_small_initial_partial_block_272:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm20,ymm1,0x01
	vpclmulqdq	ymm5,ymm20,ymm1,0x10
	vpclmulqdq	ymm0,ymm20,ymm1,0x11
	vpclmulqdq	ymm3,ymm20,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_272:

	or	r13,r13
	je	NEAR $L$_after_reduction_272
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_272:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_12_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,244
	jae	NEAR $L$_16_blocks_overflow_273
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	jmp	NEAR $L$_16_blocks_ok_273

$L$_16_blocks_overflow_273:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
$L$_16_blocks_ok_273:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm4,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vextracti32x4	xmm11,zmm4,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10]{k1},zmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vextracti32x4	xmm7,zmm20,3
	sub	r13,16 * (12 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_274





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_274
$L$_small_initial_partial_block_274:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm20,zmm1,0x01
	vpclmulqdq	zmm5,zmm20,zmm1,0x10
	vpclmulqdq	zmm0,zmm20,zmm1,0x11
	vpclmulqdq	zmm3,zmm20,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_274:

	or	r13,r13
	je	NEAR $L$_after_reduction_274
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_274:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_13_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,243
	jae	NEAR $L$_16_blocks_overflow_275
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	xmm5,xmm4,xmm27
	jmp	NEAR $L$_16_blocks_ok_275

$L$_16_blocks_overflow_275:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	xmm5,xmm5,xmm29
$L$_16_blocks_ok_275:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm5,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	xmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	xmm5,xmm5,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	xmm5,xmm5,xmm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	xmm5,xmm5,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	xmm5,xmm5,xmm21
	vextracti32x4	xmm11,zmm5,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	XMMWORD[192+r11*1+r10]{k1},xmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	xmm21,xmm5,xmm29
	vextracti32x4	xmm7,zmm21,0
	sub	r13,16 * (13 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_276





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm21,xmm1,0x01
	vpclmulqdq	xmm5,xmm21,xmm1,0x10
	vpclmulqdq	xmm0,xmm21,xmm1,0x11
	vpclmulqdq	xmm3,xmm21,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_276
$L$_small_initial_partial_block_276:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[160+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_276:

	or	r13,r13
	je	NEAR $L$_after_reduction_276
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_276:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_14_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,242
	jae	NEAR $L$_16_blocks_overflow_277
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	ymm5,ymm4,ymm27
	jmp	NEAR $L$_16_blocks_ok_277

$L$_16_blocks_overflow_277:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	ymm5,ymm5,ymm29
$L$_16_blocks_ok_277:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm5,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	ymm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	ymm5,ymm5,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	ymm5,ymm5,ymm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	ymm5,ymm5,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	ymm5,ymm5,ymm21
	vextracti32x4	xmm11,zmm5,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	YMMWORD[192+r11*1+r10]{k1},ymm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	ymm21,ymm5,ymm29
	vextracti32x4	xmm7,zmm21,1
	sub	r13,16 * (14 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_278





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm21,ymm1,0x01
	vpclmulqdq	ymm5,ymm21,ymm1,0x10
	vpclmulqdq	ymm0,ymm21,ymm1,0x11
	vpclmulqdq	ymm3,ymm21,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_278
$L$_small_initial_partial_block_278:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[144+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm21,xmm1,0x01
	vpclmulqdq	xmm5,xmm21,xmm1,0x10
	vpclmulqdq	xmm0,xmm21,xmm1,0x11
	vpclmulqdq	xmm3,xmm21,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_278:

	or	r13,r13
	je	NEAR $L$_after_reduction_278
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_278:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_15_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,241
	jae	NEAR $L$_16_blocks_overflow_279
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_279

$L$_16_blocks_overflow_279:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_279:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm5,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21
	vextracti32x4	xmm11,zmm5,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	zmm21,zmm5,zmm29
	vextracti32x4	xmm7,zmm21,2
	sub	r13,16 * (15 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_280





	sub	r13,16
	mov	QWORD[r8],0
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm21,zmm1,0x01
	vpclmulqdq	zmm5,zmm21,zmm1,0x10
	vpclmulqdq	zmm0,zmm21,zmm1,0x11
	vpclmulqdq	zmm3,zmm21,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_280
$L$_small_initial_partial_block_280:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[128+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[192+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm21,ymm1,0x01
	vpclmulqdq	ymm5,ymm21,ymm1,0x10
	vpclmulqdq	ymm0,ymm21,ymm1,0x11
	vpclmulqdq	ymm3,ymm21,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_280:

	or	r13,r13
	je	NEAR $L$_after_reduction_280
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_280:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_16_250:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,192
	kmovq	k1,[rax*8+r10]
	cmp	r15d,240
	jae	NEAR $L$_16_blocks_overflow_281
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	zmm4,zmm3,zmm27
	vpaddd	zmm5,zmm4,zmm27
	jmp	NEAR $L$_16_blocks_ok_281

$L$_16_blocks_overflow_281:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpaddd	zmm5,zmm4,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	zmm4,zmm4,zmm29
	vpshufb	zmm5,zmm5,zmm29
$L$_16_blocks_ok_281:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vmovdqa64	zmm8,ZMMWORD[1280+rsp]
	vmovdqu64	zmm1,ZMMWORD[512+rsp]
	vextracti32x4	xmm2,zmm5,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[576+rsp]
	vmovdqa64	zmm22,ZMMWORD[1344+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[640+rsp]
	vmovdqa64	zmm8,ZMMWORD[1408+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[704+rsp]
	vmovdqa64	zmm22,ZMMWORD[1472+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	zmm20,ZMMWORD[128+r11*1+r9]
	vmovdqu8	zmm21{k1}{z},[192+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpternlogq	zmm14,zmm24,zmm12,0x96
	vpternlogq	zmm7,zmm25,zmm13,0x96
	vpternlogq	zmm10,zmm26,zmm15,0x96
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vpsrldq	zmm15,zmm10,8
	vpslldq	zmm10,zmm10,8

	vmovdqa64	xmm16,XMMWORD[POLY2]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vpxorq	zmm14,zmm14,zmm15
	vpxorq	zmm7,zmm7,zmm10
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vextracti64x4	ymm12,zmm14,1
	vpxorq	ymm14,ymm14,ymm12
	vextracti32x4	xmm12,ymm14,1
	vpxorq	xmm14,xmm14,xmm12
	vextracti64x4	ymm13,zmm7,1
	vpxorq	ymm7,ymm7,ymm13
	vextracti32x4	xmm13,ymm7,1
	vpxorq	xmm7,xmm7,xmm13
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vpclmulqdq	xmm13,xmm16,xmm7,0x01
	vpslldq	xmm13,xmm13,8
	vpxorq	xmm13,xmm7,xmm13
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	zmm4,zmm4,zmm30
	vaesenc	zmm5,zmm5,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	zmm4,zmm4,zmm31
	vaesenc	zmm5,zmm5,zmm31
	vpclmulqdq	xmm12,xmm16,xmm13,0x00
	vpsrldq	xmm12,xmm12,4
	vpclmulqdq	xmm15,xmm16,xmm13,0x10
	vpslldq	xmm15,xmm15,4

	vpternlogq	xmm14,xmm15,xmm12,0x96
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	zmm4,zmm4,zmm30
	vaesenclast	zmm5,zmm5,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	zmm4,zmm4,zmm20
	vpxorq	zmm5,zmm5,zmm21
	vextracti32x4	xmm11,zmm5,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	ZMMWORD[128+r11*1+r10],zmm4
	vmovdqu8	ZMMWORD[192+r11*1+r10]{k1},zmm5
	vmovdqu8	zmm5{k1}{z},zmm5
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	zmm20,zmm4,zmm29
	vpshufb	zmm21,zmm5,zmm29
	vextracti32x4	xmm7,zmm21,3
	sub	r13,16 * (16 - 1)
$L$_small_initial_partial_block_282:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vpxorq	zmm17,zmm17,zmm14
	vmovdqu64	zmm1,ZMMWORD[112+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[176+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm17,zmm20,zmm1,0x11
	vpclmulqdq	zmm19,zmm20,zmm1,0x00
	vpternlogq	zmm8,zmm17,zmm0,0x96
	vpternlogq	zmm22,zmm19,zmm3,0x96
	vpclmulqdq	zmm17,zmm20,zmm1,0x01
	vpclmulqdq	zmm19,zmm20,zmm1,0x10
	vpternlogq	zmm30,zmm17,zmm4,0x96
	vpternlogq	zmm31,zmm19,zmm5,0x96
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm21,zmm1,0x01
	vpclmulqdq	zmm5,zmm21,zmm1,0x10
	vpclmulqdq	zmm0,zmm21,zmm1,0x11
	vpclmulqdq	zmm3,zmm21,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpxorq	zmm5,zmm5,zmm31
	vpxorq	zmm0,zmm0,zmm8
	vpxorq	zmm3,zmm3,zmm22

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_282:
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_282:
	jmp	NEAR $L$_last_blocks_done_250
$L$_last_num_blocks_is_0_250:
	vmovdqa64	zmm13,ZMMWORD[1280+rsp]
	vmovdqu64	zmm12,ZMMWORD[512+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1344+rsp]
	vmovdqu64	zmm12,ZMMWORD[576+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10
	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96
	vmovdqa64	zmm13,ZMMWORD[1408+rsp]
	vmovdqu64	zmm12,ZMMWORD[640+rsp]
	vpclmulqdq	zmm0,zmm13,zmm12,0x11
	vpclmulqdq	zmm3,zmm13,zmm12,0x00
	vpclmulqdq	zmm4,zmm13,zmm12,0x01
	vpclmulqdq	zmm5,zmm13,zmm12,0x10
	vmovdqa64	zmm13,ZMMWORD[1472+rsp]
	vmovdqu64	zmm12,ZMMWORD[704+rsp]
	vpclmulqdq	zmm6,zmm13,zmm12,0x11
	vpclmulqdq	zmm7,zmm13,zmm12,0x00
	vpclmulqdq	zmm10,zmm13,zmm12,0x01
	vpclmulqdq	zmm11,zmm13,zmm12,0x10

	vpternlogq	zmm26,zmm4,zmm10,0x96
	vpternlogq	zmm24,zmm0,zmm6,0x96
	vpternlogq	zmm25,zmm3,zmm7,0x96
	vpternlogq	zmm26,zmm5,zmm11,0x96

	vpsrldq	zmm0,zmm26,8
	vpslldq	zmm3,zmm26,8
	vpxorq	zmm24,zmm24,zmm0
	vpxorq	zmm25,zmm25,zmm3
	vextracti64x4	ymm0,zmm24,1
	vpxorq	ymm24,ymm24,ymm0
	vextracti32x4	xmm0,ymm24,1
	vpxorq	xmm24,xmm24,xmm0
	vextracti64x4	ymm3,zmm25,1
	vpxorq	ymm25,ymm25,ymm3
	vextracti32x4	xmm3,ymm25,1
	vpxorq	xmm25,xmm25,xmm3
	vmovdqa64	xmm4,XMMWORD[POLY2]


	vpclmulqdq	xmm0,xmm4,xmm25,0x01
	vpslldq	xmm0,xmm0,8
	vpxorq	xmm0,xmm25,xmm0


	vpclmulqdq	xmm3,xmm4,xmm0,0x00
	vpsrldq	xmm3,xmm3,4
	vpclmulqdq	xmm14,xmm4,xmm0,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm3,xmm24,0x96

$L$_last_blocks_done_250:
	vpshufb	xmm2,xmm2,xmm29
	jmp	NEAR $L$_ghash_done_172

$L$_message_below_32_blocks_172:


	sub	r13,256
	add	r11,256
	mov	r10d,r13d
	test	r14,r14
	jnz	NEAR $L$_skip_hkeys_precomputation_283
	vmovdqu64	zmm3,ZMMWORD[640+rsp]


	vshufi64x2	zmm3,zmm3,zmm3,0x00

	vmovdqu64	zmm4,ZMMWORD[576+rsp]
	vmovdqu64	zmm5,ZMMWORD[512+rsp]

	vpclmulqdq	zmm6,zmm4,zmm3,0x11
	vpclmulqdq	zmm7,zmm4,zmm3,0x00
	vpclmulqdq	zmm10,zmm4,zmm3,0x01
	vpclmulqdq	zmm4,zmm4,zmm3,0x10
	vpxorq	zmm4,zmm4,zmm10

	vpsrldq	zmm10,zmm4,8
	vpslldq	zmm4,zmm4,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm4,zmm4,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm4,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm4,zmm4,zmm7



	vpclmulqdq	zmm7,zmm10,zmm4,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm4,zmm10,zmm4,0x10
	vpslldq	zmm4,zmm4,4

	vpternlogq	zmm4,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[448+rsp],zmm4

	vpclmulqdq	zmm6,zmm5,zmm3,0x11
	vpclmulqdq	zmm7,zmm5,zmm3,0x00
	vpclmulqdq	zmm10,zmm5,zmm3,0x01
	vpclmulqdq	zmm5,zmm5,zmm3,0x10
	vpxorq	zmm5,zmm5,zmm10

	vpsrldq	zmm10,zmm5,8
	vpslldq	zmm5,zmm5,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm5,zmm5,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm5,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm5,zmm5,zmm7



	vpclmulqdq	zmm7,zmm10,zmm5,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm5,zmm10,zmm5,0x10
	vpslldq	zmm5,zmm5,4

	vpternlogq	zmm5,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[384+rsp],zmm5

	vpclmulqdq	zmm6,zmm4,zmm3,0x11
	vpclmulqdq	zmm7,zmm4,zmm3,0x00
	vpclmulqdq	zmm10,zmm4,zmm3,0x01
	vpclmulqdq	zmm4,zmm4,zmm3,0x10
	vpxorq	zmm4,zmm4,zmm10

	vpsrldq	zmm10,zmm4,8
	vpslldq	zmm4,zmm4,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm4,zmm4,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm4,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm4,zmm4,zmm7



	vpclmulqdq	zmm7,zmm10,zmm4,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm4,zmm10,zmm4,0x10
	vpslldq	zmm4,zmm4,4

	vpternlogq	zmm4,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[320+rsp],zmm4

	vpclmulqdq	zmm6,zmm5,zmm3,0x11
	vpclmulqdq	zmm7,zmm5,zmm3,0x00
	vpclmulqdq	zmm10,zmm5,zmm3,0x01
	vpclmulqdq	zmm5,zmm5,zmm3,0x10
	vpxorq	zmm5,zmm5,zmm10

	vpsrldq	zmm10,zmm5,8
	vpslldq	zmm5,zmm5,8
	vpxorq	zmm6,zmm6,zmm10
	vpxorq	zmm5,zmm5,zmm7



	vmovdqu64	zmm10,ZMMWORD[POLY2]

	vpclmulqdq	zmm7,zmm10,zmm5,0x01
	vpslldq	zmm7,zmm7,8
	vpxorq	zmm5,zmm5,zmm7



	vpclmulqdq	zmm7,zmm10,zmm5,0x00
	vpsrldq	zmm7,zmm7,4
	vpclmulqdq	zmm5,zmm10,zmm5,0x10
	vpslldq	zmm5,zmm5,4

	vpternlogq	zmm5,zmm6,zmm7,0x96

	vmovdqu64	ZMMWORD[256+rsp],zmm5
$L$_skip_hkeys_precomputation_283:
	mov	r14,1
	and	r10d,~15
	mov	ebx,512
	sub	ebx,r10d
	mov	r10d,r13d
	add	r10d,15
	shr	r10d,4
	je	NEAR $L$_last_num_blocks_is_0_284

	cmp	r10d,8
	je	NEAR $L$_last_num_blocks_is_8_284
	jb	NEAR $L$_last_num_blocks_is_7_1_284


	cmp	r10d,12
	je	NEAR $L$_last_num_blocks_is_12_284
	jb	NEAR $L$_last_num_blocks_is_11_9_284


	cmp	r10d,15
	je	NEAR $L$_last_num_blocks_is_15_284
	ja	NEAR $L$_last_num_blocks_is_16_284
	cmp	r10d,14
	je	NEAR $L$_last_num_blocks_is_14_284
	jmp	NEAR $L$_last_num_blocks_is_13_284

$L$_last_num_blocks_is_11_9_284:

	cmp	r10d,10
	je	NEAR $L$_last_num_blocks_is_10_284
	ja	NEAR $L$_last_num_blocks_is_11_284
	jmp	NEAR $L$_last_num_blocks_is_9_284

$L$_last_num_blocks_is_7_1_284:
	cmp	r10d,4
	je	NEAR $L$_last_num_blocks_is_4_284
	jb	NEAR $L$_last_num_blocks_is_3_1_284

	cmp	r10d,6
	ja	NEAR $L$_last_num_blocks_is_7_284
	je	NEAR $L$_last_num_blocks_is_6_284
	jmp	NEAR $L$_last_num_blocks_is_5_284

$L$_last_num_blocks_is_3_1_284:

	cmp	r10d,2
	ja	NEAR $L$_last_num_blocks_is_3_284
	je	NEAR $L$_last_num_blocks_is_2_284
$L$_last_num_blocks_is_1_284:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,255
	jae	NEAR $L$_16_blocks_overflow_285
	vpaddd	xmm0,xmm2,xmm28
	jmp	NEAR $L$_16_blocks_ok_285

$L$_16_blocks_overflow_285:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	xmm0,xmm0,xmm29
$L$_16_blocks_ok_285:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	xmm17{k1}{z},[r11*1+r9]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	xmm0,xmm0,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	xmm0,xmm0,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	xmm0,xmm0,xmm31
	vaesenclast	xmm0,xmm0,xmm30
	vpxorq	xmm0,xmm0,xmm17
	vextracti32x4	xmm11,zmm0,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	XMMWORD[r11*1+r10]{k1},xmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	xmm17,xmm0,xmm29
	vextracti32x4	xmm7,zmm17,0


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_286





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_286
$L$_small_initial_partial_block_286:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11


	vpsrldq	zmm0,zmm26,8
	vpslldq	zmm3,zmm26,8
	vpxorq	zmm24,zmm24,zmm0
	vpxorq	zmm25,zmm25,zmm3
	vextracti64x4	ymm0,zmm24,1
	vpxorq	ymm24,ymm24,ymm0
	vextracti32x4	xmm0,ymm24,1
	vpxorq	xmm24,xmm24,xmm0
	vextracti64x4	ymm3,zmm25,1
	vpxorq	ymm25,ymm25,ymm3
	vextracti32x4	xmm3,ymm25,1
	vpxorq	xmm25,xmm25,xmm3
	vmovdqa64	xmm0,XMMWORD[POLY2]


	vpclmulqdq	xmm3,xmm0,xmm25,0x01
	vpslldq	xmm3,xmm3,8
	vpxorq	xmm3,xmm25,xmm3


	vpclmulqdq	xmm4,xmm0,xmm3,0x00
	vpsrldq	xmm4,xmm4,4
	vpclmulqdq	xmm14,xmm0,xmm3,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm4,xmm24,0x96












	vpxorq	xmm14,xmm14,xmm7

	jmp	NEAR $L$_after_reduction_286
$L$_small_initial_compute_done_286:
$L$_after_reduction_286:
	jmp	NEAR $L$_last_blocks_done_284
$L$_last_num_blocks_is_2_284:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,254
	jae	NEAR $L$_16_blocks_overflow_287
	vpaddd	ymm0,ymm2,ymm28
	jmp	NEAR $L$_16_blocks_ok_287

$L$_16_blocks_overflow_287:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	ymm0,ymm0,ymm29
$L$_16_blocks_ok_287:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	ymm17{k1}{z},[r11*1+r9]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	ymm0,ymm0,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	ymm0,ymm0,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	ymm0,ymm0,ymm31
	vaesenclast	ymm0,ymm0,ymm30
	vpxorq	ymm0,ymm0,ymm17
	vextracti32x4	xmm11,zmm0,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	YMMWORD[r11*1+r10]{k1},ymm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	ymm17,ymm0,ymm29
	vextracti32x4	xmm7,zmm17,1
	sub	r13,16 * (2 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_288





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_288
$L$_small_initial_partial_block_288:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm17,xmm1,0x01
	vpclmulqdq	xmm5,xmm17,xmm1,0x10
	vpclmulqdq	xmm0,xmm17,xmm1,0x11
	vpclmulqdq	xmm3,xmm17,xmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_288:

	or	r13,r13
	je	NEAR $L$_after_reduction_288
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_288:
	jmp	NEAR $L$_last_blocks_done_284
$L$_last_num_blocks_is_3_284:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,253
	jae	NEAR $L$_16_blocks_overflow_289
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_289

$L$_16_blocks_overflow_289:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_289:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,2
	sub	r13,16 * (3 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_290





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_290
$L$_small_initial_partial_block_290:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm17,ymm1,0x01
	vpclmulqdq	ymm5,ymm17,ymm1,0x10
	vpclmulqdq	ymm0,ymm17,ymm1,0x11
	vpclmulqdq	ymm3,ymm17,ymm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_290:

	or	r13,r13
	je	NEAR $L$_after_reduction_290
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_290:
	jmp	NEAR $L$_last_blocks_done_284
$L$_last_num_blocks_is_4_284:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	kmovq	k1,[rax*8+r10]
	cmp	r15d,252
	jae	NEAR $L$_16_blocks_overflow_291
	vpaddd	zmm0,zmm2,zmm28
	jmp	NEAR $L$_16_blocks_ok_291

$L$_16_blocks_overflow_291:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vpshufb	zmm0,zmm0,zmm29
$L$_16_blocks_ok_291:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm0,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17{k1}{z},[r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vextracti32x4	xmm11,zmm0,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10]{k1},zmm0
	vmovdqu8	zmm0{k1}{z},zmm0
	vpshufb	zmm17,zmm0,zmm29
	vextracti32x4	xmm7,zmm17,3
	sub	r13,16 * (4 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_292





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_292
$L$_small_initial_partial_block_292:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpxorq	zmm4,zmm4,zmm26
	vpxorq	zmm0,zmm0,zmm24
	vpxorq	zmm3,zmm3,zmm25

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_292:

	or	r13,r13
	je	NEAR $L$_after_reduction_292
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_292:
	jmp	NEAR $L$_last_blocks_done_284
$L$_last_num_blocks_is_5_284:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,251
	jae	NEAR $L$_16_blocks_overflow_293
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	xmm3,xmm0,xmm27
	jmp	NEAR $L$_16_blocks_ok_293

$L$_16_blocks_overflow_293:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	xmm3,xmm3,xmm29
$L$_16_blocks_ok_293:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	xmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	xmm3,xmm3,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	xmm3,xmm3,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	xmm3,xmm3,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	xmm3,xmm3,xmm19
	vextracti32x4	xmm11,zmm3,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	XMMWORD[64+r11*1+r10]{k1},xmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	xmm19,xmm3,xmm29
	vextracti32x4	xmm7,zmm19,0
	sub	r13,16 * (5 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_294





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_294
$L$_small_initial_partial_block_294:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_294:

	or	r13,r13
	je	NEAR $L$_after_reduction_294
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_294:
	jmp	NEAR $L$_last_blocks_done_284
$L$_last_num_blocks_is_6_284:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,250
	jae	NEAR $L$_16_blocks_overflow_295
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	ymm3,ymm0,ymm27
	jmp	NEAR $L$_16_blocks_ok_295

$L$_16_blocks_overflow_295:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	ymm3,ymm3,ymm29
$L$_16_blocks_ok_295:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,1
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	ymm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	ymm3,ymm3,ymm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	ymm3,ymm3,ymm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	ymm3,ymm3,ymm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	ymm3,ymm3,ymm19
	vextracti32x4	xmm11,zmm3,1
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	YMMWORD[64+r11*1+r10]{k1},ymm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	ymm19,ymm3,ymm29
	vextracti32x4	xmm7,zmm19,1
	sub	r13,16 * (6 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_296





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_296
$L$_small_initial_partial_block_296:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm19,xmm1,0x01
	vpclmulqdq	xmm5,xmm19,xmm1,0x10
	vpclmulqdq	xmm0,xmm19,xmm1,0x11
	vpclmulqdq	xmm3,xmm19,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_296:

	or	r13,r13
	je	NEAR $L$_after_reduction_296
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_296:
	jmp	NEAR $L$_last_blocks_done_284
$L$_last_num_blocks_is_7_284:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,249
	jae	NEAR $L$_16_blocks_overflow_297
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_297

$L$_16_blocks_overflow_297:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_297:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,2
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,2
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,2
	sub	r13,16 * (7 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_298





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_298
$L$_small_initial_partial_block_298:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[256+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[320+rdx]
	vpclmulqdq	ymm4,ymm19,ymm1,0x01
	vpclmulqdq	ymm5,ymm19,ymm1,0x10
	vpclmulqdq	ymm0,ymm19,ymm1,0x11
	vpclmulqdq	ymm3,ymm19,ymm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_298:

	or	r13,r13
	je	NEAR $L$_after_reduction_298
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_298:
	jmp	NEAR $L$_last_blocks_done_284
$L$_last_num_blocks_is_8_284:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,64
	kmovq	k1,[rax*8+r10]
	cmp	r15d,248
	jae	NEAR $L$_16_blocks_overflow_299
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	jmp	NEAR $L$_16_blocks_ok_299

$L$_16_blocks_overflow_299:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
$L$_16_blocks_ok_299:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm3,3
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19{k1}{z},[64+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vextracti32x4	xmm11,zmm3,3
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10]{k1},zmm3
	vmovdqu8	zmm3{k1}{z},zmm3
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vextracti32x4	xmm7,zmm19,3
	sub	r13,16 * (8 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_300





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_300
$L$_small_initial_partial_block_300:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[240+rdx]
	vpclmulqdq	zmm8,zmm17,zmm1,0x11
	vpclmulqdq	zmm22,zmm17,zmm1,0x00
	vpclmulqdq	zmm30,zmm17,zmm1,0x01
	vpclmulqdq	zmm31,zmm17,zmm1,0x10
	vmovdqu64	ymm1,YMMWORD[304+rdx]
	vinserti64x2	zmm1,zmm1,ZMMWORD[336+rdx],2
	vpclmulqdq	zmm4,zmm19,zmm1,0x01
	vpclmulqdq	zmm5,zmm19,zmm1,0x10
	vpclmulqdq	zmm0,zmm19,zmm1,0x11
	vpclmulqdq	zmm3,zmm19,zmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_300:

	or	r13,r13
	je	NEAR $L$_after_reduction_300
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_300:
	jmp	NEAR $L$_last_blocks_done_284
$L$_last_num_blocks_is_9_284:
	lea	r10,[byte64_len_to_mask_table]
	mov	rax,r13
	sub	rax,128
	kmovq	k1,[rax*8+r10]
	cmp	r15d,247
	jae	NEAR $L$_16_blocks_overflow_301
	vpaddd	zmm0,zmm2,zmm28
	vpaddd	zmm3,zmm0,zmm27
	vpaddd	xmm4,xmm3,xmm27
	jmp	NEAR $L$_16_blocks_ok_301

$L$_16_blocks_overflow_301:
	vpshufb	zmm2,zmm2,zmm29
	vpaddd	zmm0,zmm2,ZMMWORD[ddq_add_1234]
	vmovdqa64	zmm5,ZMMWORD[ddq_add_4444]
	vpaddd	zmm3,zmm0,zmm5
	vpaddd	zmm4,zmm3,zmm5
	vpshufb	zmm0,zmm0,zmm29
	vpshufb	zmm3,zmm3,zmm29
	vpshufb	xmm4,xmm4,xmm29
$L$_16_blocks_ok_301:




	vbroadcastf64x2	zmm30,ZMMWORD[rcx]
	vpxorq	zmm8,zmm14,ZMMWORD[768+rsp]
	vmovdqu64	zmm1,ZMMWORD[rbx*1+rsp]
	vextracti32x4	xmm2,zmm4,0
	vshufi64x2	zmm2,zmm2,zmm2,0


	vbroadcastf64x2	zmm31,ZMMWORD[16+rcx]
	vmovdqu64	zmm18,ZMMWORD[64+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[832+rsp]
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm30
	vpxorq	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[32+rcx]


	vpclmulqdq	zmm14,zmm8,zmm1,0x11
	vpclmulqdq	zmm7,zmm8,zmm1,0x00
	vpclmulqdq	zmm10,zmm8,zmm1,0x01
	vpclmulqdq	zmm11,zmm8,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[128+rbx*1+rsp]
	vmovdqa64	zmm8,ZMMWORD[896+rsp]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[48+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vmovdqu64	zmm18,ZMMWORD[192+rbx*1+rsp]
	vmovdqa64	zmm22,ZMMWORD[960+rsp]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[64+rcx]


	vpclmulqdq	zmm20,zmm8,zmm1,0x10
	vpclmulqdq	zmm21,zmm8,zmm1,0x01
	vpclmulqdq	zmm17,zmm8,zmm1,0x11
	vpclmulqdq	zmm19,zmm8,zmm1,0x00
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[80+rcx]


	vpternlogq	zmm14,zmm12,zmm17,0x96
	vpternlogq	zmm7,zmm13,zmm19,0x96
	vpternlogq	zmm11,zmm16,zmm21,0x96
	vpternlogq	zmm10,zmm15,zmm20,0x96
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[96+rcx]
	vmovdqu8	zmm17,ZMMWORD[r11*1+r9]
	vmovdqu8	zmm19,ZMMWORD[64+r11*1+r9]
	vmovdqu8	xmm20{k1}{z},[128+r11*1+r9]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[112+rcx]


	vpclmulqdq	zmm15,zmm22,zmm18,0x10
	vpclmulqdq	zmm16,zmm22,zmm18,0x01
	vpclmulqdq	zmm12,zmm22,zmm18,0x11
	vpclmulqdq	zmm13,zmm22,zmm18,0x00
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[128+rcx]
	vpternlogq	zmm10,zmm11,zmm16,0x96
	vpxorq	zmm24,zmm14,zmm12
	vpxorq	zmm25,zmm7,zmm13
	vpxorq	zmm26,zmm10,zmm15
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[144+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[160+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vbroadcastf64x2	zmm31,ZMMWORD[176+rcx]
	vaesenc	zmm0,zmm0,zmm30
	vaesenc	zmm3,zmm3,zmm30
	vaesenc	xmm4,xmm4,xmm30
	vbroadcastf64x2	zmm30,ZMMWORD[192+rcx]
	vaesenc	zmm0,zmm0,zmm31
	vaesenc	zmm3,zmm3,zmm31
	vaesenc	xmm4,xmm4,xmm31
	vaesenclast	zmm0,zmm0,zmm30
	vaesenclast	zmm3,zmm3,zmm30
	vaesenclast	xmm4,xmm4,xmm30
	vpxorq	zmm0,zmm0,zmm17
	vpxorq	zmm3,zmm3,zmm19
	vpxorq	xmm4,xmm4,xmm20
	vextracti32x4	xmm11,zmm4,0
	mov	r10,QWORD[120+rbp]
	vmovdqu8	ZMMWORD[r11*1+r10],zmm0
	vmovdqu8	ZMMWORD[64+r11*1+r10],zmm3
	vmovdqu8	XMMWORD[128+r11*1+r10]{k1},xmm4
	vmovdqu8	zmm4{k1}{z},zmm4
	vpshufb	zmm17,zmm0,zmm29
	vpshufb	zmm19,zmm3,zmm29
	vpshufb	xmm20,xmm4,xmm29
	vextracti32x4	xmm7,zmm20,0
	sub	r13,16 * (9 - 1)


	cmp	r13,16
	jl	NEAR $L$_small_initial_partial_block_302





	sub	r13,16
	mov	QWORD[r8],0
	vmovdqu64	zmm1,ZMMWORD[208+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[272+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31
	vmovdqu64	xmm1,XMMWORD[336+rdx]
	vpclmulqdq	xmm4,xmm20,xmm1,0x01
	vpclmulqdq	xmm5,xmm20,xmm1,0x10
	vpclmulqdq	xmm0,xmm20,xmm1,0x11
	vpclmulqdq	xmm3,xmm20,xmm1,0x00

	vpxorq	zmm4,zmm4,zmm30
	vpternlogq	zmm5,zmm26,zmm31,0x96
	vpternlogq	zmm0,zmm24,zmm8,0x96
	vpternlogq	zmm3,zmm25,zmm22,0x96

	vpxorq	zmm4,zmm4,zmm5
	vpsrldq	zmm30,zmm4,8
	vpslldq	zmm31,zmm4,8
	vpxorq	zmm0,zmm0,zmm30
	vpxorq	zmm3,zmm3,zmm31
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

	jmp	NEAR $L$_small_initial_compute_done_302
$L$_small_initial_partial_block_302:








	mov	QWORD[r8],r13
	vmovdqu64	XMMWORD[16+rdx],xmm11
	vmovdqu64	zmm1,ZMMWORD[224+rdx]
	vpclmulqdq	zmm0,zmm17,zmm1,0x11
	vpclmulqdq	zmm3,zmm17,zmm1,0x00
	vpclmulqdq	zmm4,zmm17,zmm1,0x01
	vpclmulqdq	zmm5,zmm17,zmm1,0x10
	vmovdqu64	zmm1,ZMMWORD[288+rdx]
	vpclmulqdq	zmm8,zmm19,zmm1,0x11
	vpclmulqdq	zmm22,zmm19,zmm1,0x00
	vpclmulqdq	zmm30,zmm19,zmm1,0x01
	vpclmulqdq	zmm31,zmm19,zmm1,0x10
	vpxorq	zmm8,zmm0,zmm8
	vpxorq	zmm22,zmm3,zmm22
	vpxorq	zmm30,zmm4,zmm30
	vpxorq	zmm31,zmm5,zmm31

	vpxorq	zmm30,zmm30,zmm26
	vpxorq	zmm8,zmm8,zmm24
	vpxorq	zmm22,zmm22,zmm25

	vpxorq	zmm30,zmm30,zmm31
	vpsrldq	zmm4,zmm30,8
	vpslldq	zmm5,zmm30,8
	vpxorq	zmm0,zmm8,zmm4
	vpxorq	zmm3,zmm22,zmm5
	vextracti64x4	ymm30,zmm0,1
	vpxorq	ymm0,ymm0,ymm30
	vextracti32x4	xmm30,ymm0,1
	vpxorq	xmm0,xmm0,xmm30
	vextracti64x4	ymm31,zmm3,1
	vpxorq	ymm3,ymm3,ymm31
	vextracti32x4	xmm31,ymm3,1
	vpxorq	xmm3,xmm3,xmm31
	vmovdqa64	xmm1,XMMWORD[POLY2]


	vpclmulqdq	xmm4,xmm1,xmm3,0x01
	vpslldq	xmm4,xmm4,8
	vpxorq	xmm4,xmm3,xmm4


	vpclmulqdq	xmm5,xmm1,xmm4,0x00
	vpsrldq	xmm5,xmm5,4
	vpclmulqdq	xmm14,xmm1,xmm4,0x10
	vpslldq	xmm14,xmm14,4
	vpternlogq	xmm14,xmm5,xmm0,0x96

$L$_small_initial_compute_done_302:

	or	r13,r13
	je	NEAR $L$_after_reduction_302
	vpxorq	xmm14,xmm14,xmm7
$L$_after_reduction_302:
	jmp	