diff mbox series

[v2,5/6] crypto/fsl: instantiate the RNG with prediciton resistance

Message ID 20200604154610.19810-6-michael@walle.cc
State Superseded
Headers show
Series crypto/fsl: add RNG support | expand

Commit Message

Michael Walle June 4, 2020, 3:46 p.m. UTC
If it is already instantiated tear it down first and then reinstanciate
it again with prediction resistance.

Signed-off-by: Michael Walle <michael at walle.cc>
---
 drivers/crypto/fsl/desc.h    |  2 ++
 drivers/crypto/fsl/jobdesc.c | 12 ++++++-
 drivers/crypto/fsl/jobdesc.h |  2 ++
 drivers/crypto/fsl/jr.c      | 66 ++++++++++++++++++++++++++++++++----
 include/fsl_sec.h            |  7 ++--
 5 files changed, 78 insertions(+), 11 deletions(-)

Comments

Michael Walle June 4, 2020, 4:16 p.m. UTC | #1
Am 2020-06-04 17:46, schrieb Michael Walle:
> If it is already instantiated tear it down first and then reinstanciate
> it again with prediction resistance.
> 
> Signed-off-by: Michael Walle <michael at walle.cc>
> ---
>  drivers/crypto/fsl/desc.h    |  2 ++
>  drivers/crypto/fsl/jobdesc.c | 12 ++++++-
>  drivers/crypto/fsl/jobdesc.h |  2 ++
>  drivers/crypto/fsl/jr.c      | 66 ++++++++++++++++++++++++++++++++----
>  include/fsl_sec.h            |  7 ++--
>  5 files changed, 78 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/crypto/fsl/desc.h b/drivers/crypto/fsl/desc.h
> index 11ad506829..3589e6ea02 100644
> --- a/drivers/crypto/fsl/desc.h
> +++ b/drivers/crypto/fsl/desc.h
> @@ -520,6 +520,8 @@
>  #define OP_ALG_ICV_OFF		(0 << OP_ALG_ICV_SHIFT)
>  #define OP_ALG_ICV_ON		(1 << OP_ALG_ICV_SHIFT)
> 
> +#define OP_ALG_PR_ON		0x02
> +
>  #define OP_ALG_DIR_SHIFT	0
>  #define OP_ALG_DIR_MASK		1
>  #define OP_ALG_DECRYPT		0
> diff --git a/drivers/crypto/fsl/jobdesc.c 
> b/drivers/crypto/fsl/jobdesc.c
> index 6102e9c06b..d9554c550b 100644
> --- a/drivers/crypto/fsl/jobdesc.c
> +++ b/drivers/crypto/fsl/jobdesc.c
> @@ -266,7 +266,8 @@ void inline_cnstr_jobdesc_rng_instantiation(u32
> *desc, int handle, int do_sk)
> 
>  	/* INIT RNG in non-test mode */
>  	append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG |
> -			(handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT);
> +			 (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT |
> +			 OP_ALG_PR_ON);
> 
>  	/* For SH0, Secure Keys must be generated as well */
>  	if (!handle && do_sk) {
> @@ -286,6 +287,15 @@ void inline_cnstr_jobdesc_rng_instantiation(u32
> *desc, int handle, int do_sk)
>  	}
>  }
> 
> +/* Descriptor for deinstantiation of the RNG block. */
> +void inline_cnstr_jobdesc_rng_deinstantiation(u32 *desc, int handle)
> +{
> +	init_job_desc(desc, 0);
> +
> +	append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG |
> +			 (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INITFINAL);
> +}
> +
>  /* Change key size to bytes form bits in calling function*/
>  void inline_cnstr_jobdesc_pkha_rsaexp(uint32_t *desc,
>  				      struct pk_in_params *pkin, uint8_t *out,
> diff --git a/drivers/crypto/fsl/jobdesc.h 
> b/drivers/crypto/fsl/jobdesc.h
> index 14b2a119d7..5185ddd535 100644
> --- a/drivers/crypto/fsl/jobdesc.h
> +++ b/drivers/crypto/fsl/jobdesc.h
> @@ -41,6 +41,8 @@ void inline_cnstr_jobdesc_blob_decap(uint32_t *desc,
> uint8_t *key_idnfr,
> 
>  void inline_cnstr_jobdesc_rng_instantiation(u32 *desc, int handle, int 
> do_sk);
> 
> +void inline_cnstr_jobdesc_rng_deinstantiation(u32 *desc, int handle);
> +
>  void inline_cnstr_jobdesc_pkha_rsaexp(uint32_t *desc,
>  				      struct pk_in_params *pkin, uint8_t *out,
>  				      uint32_t out_siz);
> diff --git a/drivers/crypto/fsl/jr.c b/drivers/crypto/fsl/jr.c
> index 42865a6cd7..14f9227b37 100644
> --- a/drivers/crypto/fsl/jr.c
> +++ b/drivers/crypto/fsl/jr.c
> @@ -446,6 +446,51 @@ int sec_reset(void)
>  	return sec_reset_idx(0);
>  }
>  #ifndef CONFIG_SPL_BUILD
> +static int deinstantiate_rng(u8 sec_idx, int state_handle_mask)
> +{
> +	u32 *desc;
> +	int sh_idx, ret = 0;
> +	int desc_size = sizeof(u32) * 3;

This should be 2.

> +
> +	desc = memalign(ARCH_DMA_MINALIGN, desc_size);
> +	if (!desc) {
> +		debug("cannot allocate RNG init descriptor memory\n");
> +		return -ENOMEM;
> +	}
> +
> +	for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) {
> +		/*
> +		 * If the corresponding bit is set, then it means the state
> +		 * handle was initialized by us, and thus it needs to be
> +		 * deinitialized as well
> +		 */
> +
> +		if (state_handle_mask & RDSTA_IF(sh_idx)) {
> +			/*
> +			 * Create the descriptor for deinstantating this state
> +			 * handle.
> +			 */
> +			inline_cnstr_jobdesc_rng_deinstantiation(desc, sh_idx);
> +			flush_dcache_range((unsigned long)desc,
> +					   (unsigned long)desc + desc_size);
> +
> +			ret = run_descriptor_jr_idx(desc, sec_idx);
> +			if (ret) {
> +				printf("SEC%u:  RNG4 SH%d deinstantiation failed with error 
> 0x%x\n",
> +				       sec_idx, sh_idx, ret);
> +				ret = -EIO;
> +				break;
> +			}
> +
> +			printf("SEC%u:  Deinstantiated RNG4 SH%d\n",
> +			       sec_idx, sh_idx);
> +		}
> +	}
> +
> +	free(desc);
> +	return ret;
> +}
> +
>  static int instantiate_rng(u8 sec_idx, int gen_sk)
>  {
>  	u32 *desc;
> @@ -466,9 +511,18 @@ static int instantiate_rng(u8 sec_idx, int gen_sk)
>  		 * If the corresponding bit is set, this state handle
>  		 * was initialized by somebody else, so it's left alone.
>  		 */
> -		rdsta_val = sec_in32(&rng->rdsta) & RNG_STATE_HANDLE_MASK;
> -		if (rdsta_val & (1 << sh_idx))
> -			continue;
> +		rdsta_val = sec_in32(&rng->rdsta);
> +		if (rdsta_val & (RDSTA_IF(sh_idx) | RDSTA_PR(sh_idx))) {
> +			if (rdsta_val & RDSTA_PR(sh_idx))
> +				continue;
> +
> +			printf("SEC%u:  RNG4 SH%d was instantiated w/o prediction
> resistance. Tearing it down\n",
> +			       sec_idx, sh_idx);
> +
> +			ret = deinstantiate_rng(sec_idx, RDSTA_IF(sh_idx));
> +			if (ret)
> +				break;
> +		}
> 
>  		inline_cnstr_jobdesc_rng_instantiation(desc, sh_idx, gen_sk);
>  		size = roundup(sizeof(uint32_t) * 6, ARCH_DMA_MINALIGN);
> @@ -481,8 +535,8 @@ static int instantiate_rng(u8 sec_idx, int gen_sk)
>  			printf("SEC%u:  RNG4 SH%d instantiation failed with error 0x%x\n",
>  			       sec_idx, sh_idx, ret);
> 
> -		rdsta_val = sec_in32(&rng->rdsta) & RNG_STATE_HANDLE_MASK;
> -		if (!(rdsta_val & (1 << sh_idx))) {
> +		rdsta_val = sec_in32(&rng->rdsta);
> +		if (!(rdsta_val & RDSTA_IF(sh_idx))) {
>  			free(desc);
>  			return -1;
>  		}
> @@ -554,7 +608,7 @@ static int rng_init(uint8_t sec_idx)
> 
>  	gen_sk = !(sec_in32(&rng->rdsta) & RDSTA_SKVN);
>  	do {
> -		inst_handles = sec_in32(&rng->rdsta) & RNG_STATE_HANDLE_MASK;
> +		inst_handles = sec_in32(&rng->rdsta) & RDSTA_MASK;
> 
>  		/*
>  		 * If either of the SH's were instantiated by somebody else
> diff --git a/include/fsl_sec.h b/include/fsl_sec.h
> index 64b8751f2d..1c6f1eb23e 100644
> --- a/include/fsl_sec.h
> +++ b/include/fsl_sec.h
> @@ -65,10 +65,9 @@ struct rng4tst {
>  		u32 rtfreqcnt;	/* PRGM=0: freq. count register */
>  	};
>  	u32 rsvd1[40];
> -#define RNG_STATE0_HANDLE_INSTANTIATED	0x00000001
> -#define RNG_STATE1_HANDLE_INSTANTIATED	0x00000002
> -#define RNG_STATE_HANDLE_MASK	\
> -	(RNG_STATE0_HANDLE_INSTANTIATED | RNG_STATE1_HANDLE_INSTANTIATED)
> +#define RDSTA_IF(idx) (0x00000001 << (idx))
> +#define RDSTA_PR(idx) (0x00000010 << (idx))
> +#define RDSTA_MASK (RDSTA_PR(1) | RDSTA_PR(0) | RDSTA_IF(1) | 
> RDSTA_IF(0))
>  #define RDSTA_SKVN 0x40000000
>  	u32 rdsta;		/*RNG DRNG Status Register*/
>  	u32 rsvd2[15];
Horia Geanta June 17, 2020, 7:15 p.m. UTC | #2
On 6/4/2020 6:48 PM, Michael Walle wrote:
> +static int deinstantiate_rng(u8 sec_idx, int state_handle_mask)
> +{
> +	u32 *desc;
> +	int sh_idx, ret = 0;
> +	int desc_size = sizeof(u32) * 3;
As you mentioned, descriptor size should be sizeof(u32) * 2.

> +
> +	desc = memalign(ARCH_DMA_MINALIGN, desc_size);
> +	if (!desc) {
> +		debug("cannot allocate RNG init descriptor memory\n");
> +		return -ENOMEM;
> +	}
> +
> +	for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) {
> +		/*
> +		 * If the corresponding bit is set, then it means the state
> +		 * handle was initialized by us, and thus it needs to be
> +		 * deinitialized as well
> +		 */
> +
> +		if (state_handle_mask & RDSTA_IF(sh_idx)) {
> +			/*
> +			 * Create the descriptor for deinstantating this state
> +			 * handle.
> +			 */
> +			inline_cnstr_jobdesc_rng_deinstantiation(desc, sh_idx);
> +			flush_dcache_range((unsigned long)desc,
> +					   (unsigned long)desc + desc_size);
Shouldn't this be roundup(desc_size, ARCH_DMA_MINALIGN) instead of desc_size?

> @@ -466,9 +511,18 @@ static int instantiate_rng(u8 sec_idx, int gen_sk)
>  		 * If the corresponding bit is set, this state handle
>  		 * was initialized by somebody else, so it's left alone.
>  		 */
> -		rdsta_val = sec_in32(&rng->rdsta) & RNG_STATE_HANDLE_MASK;
> -		if (rdsta_val & (1 << sh_idx))
> -			continue;
> +		rdsta_val = sec_in32(&rng->rdsta);
> +		if (rdsta_val & (RDSTA_IF(sh_idx) | RDSTA_PR(sh_idx))) {
Adding RDSTA_PR(sh_idx) to the mask is not needed,
PR bit is meaningless without IF bit set.

> +			if (rdsta_val & RDSTA_PR(sh_idx))
> +				continue;
Could combine the condition here with the outer if condition:
		if (rdsta_val & RDSTA_IF(sh_idx) && !(rdsta_val & RDSTA_PR(sh_idx))) {

> +
> +			printf("SEC%u:  RNG4 SH%d was instantiated w/o prediction resistance. Tearing it down\n",
> +			       sec_idx, sh_idx);
> +
> +			ret = deinstantiate_rng(sec_idx, RDSTA_IF(sh_idx));
> +			if (ret)
> +				break;
> +		}

Horia
Michael Walle June 17, 2020, 8:48 p.m. UTC | #3
Am 2020-06-17 21:15, schrieb Horia Geant?:
> On 6/4/2020 6:48 PM, Michael Walle wrote:
>> +static int deinstantiate_rng(u8 sec_idx, int state_handle_mask)
>> +{
>> +	u32 *desc;
>> +	int sh_idx, ret = 0;
>> +	int desc_size = sizeof(u32) * 3;
> As you mentioned, descriptor size should be sizeof(u32) * 2.
> 
>> +
>> +	desc = memalign(ARCH_DMA_MINALIGN, desc_size);
>> +	if (!desc) {
>> +		debug("cannot allocate RNG init descriptor memory\n");
>> +		return -ENOMEM;
>> +	}
>> +
>> +	for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) {
>> +		/*
>> +		 * If the corresponding bit is set, then it means the state
>> +		 * handle was initialized by us, and thus it needs to be
>> +		 * deinitialized as well
>> +		 */
>> +
>> +		if (state_handle_mask & RDSTA_IF(sh_idx)) {
>> +			/*
>> +			 * Create the descriptor for deinstantating this state
>> +			 * handle.
>> +			 */
>> +			inline_cnstr_jobdesc_rng_deinstantiation(desc, sh_idx);
>> +			flush_dcache_range((unsigned long)desc,
>> +					   (unsigned long)desc + desc_size);
> Shouldn't this be roundup(desc_size, ARCH_DMA_MINALIGN) instead of 
> desc_size?

I've seen the same idioms sometimes, but it wasn't clear to me why that 
would
be needed; the hardware only uses the desc_size, right?

>> @@ -466,9 +511,18 @@ static int instantiate_rng(u8 sec_idx, int 
>> gen_sk)
>>  		 * If the corresponding bit is set, this state handle
>>  		 * was initialized by somebody else, so it's left alone.
>>  		 */
>> -		rdsta_val = sec_in32(&rng->rdsta) & RNG_STATE_HANDLE_MASK;
>> -		if (rdsta_val & (1 << sh_idx))
>> -			continue;
>> +		rdsta_val = sec_in32(&rng->rdsta);
>> +		if (rdsta_val & (RDSTA_IF(sh_idx) | RDSTA_PR(sh_idx))) {
> Adding RDSTA_PR(sh_idx) to the mask is not needed,
> PR bit is meaningless without IF bit set.

Ok.

> 
>> +			if (rdsta_val & RDSTA_PR(sh_idx))
>> +				continue;
> Could combine the condition here with the outer if condition:
> 		if (rdsta_val & RDSTA_IF(sh_idx) && !(rdsta_val & RDSTA_PR(sh_idx))) 
> {

If we keep that it is consistent with the linux driver. So I'd vote to
keep it. Also the continue statement would be missing and thus the
rng would be instantiated again. Or am I missing something?

-michael
Horia Geanta June 19, 2020, 4:37 p.m. UTC | #4
On 6/17/2020 11:48 PM, Michael Walle wrote:
> Am 2020-06-17 21:15, schrieb Horia Geant?:
>> On 6/4/2020 6:48 PM, Michael Walle wrote:
>>> +
>>> +	desc = memalign(ARCH_DMA_MINALIGN, desc_size);
>>> +	if (!desc) {
>>> +		debug("cannot allocate RNG init descriptor memory\n");
>>> +		return -ENOMEM;
>>> +	}
>>> +
>>> +	for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) {
>>> +		/*
>>> +		 * If the corresponding bit is set, then it means the state
>>> +		 * handle was initialized by us, and thus it needs to be
>>> +		 * deinitialized as well
>>> +		 */
>>> +
>>> +		if (state_handle_mask & RDSTA_IF(sh_idx)) {
>>> +			/*
>>> +			 * Create the descriptor for deinstantating this state
>>> +			 * handle.
>>> +			 */
>>> +			inline_cnstr_jobdesc_rng_deinstantiation(desc, sh_idx);
>>> +			flush_dcache_range((unsigned long)desc,
>>> +					   (unsigned long)desc + desc_size);
>> Shouldn't this be roundup(desc_size, ARCH_DMA_MINALIGN) instead of 
>> desc_size?
> 
> I've seen the same idioms sometimes, but it wasn't clear to me why that 
> would
> be needed; the hardware only uses the desc_size, right?
> 
Yes, HW will use only [desc, desc + desc_size].

I think this is needed to avoid cacheline sharing issues
on non-coherent platforms: CPU needs to make sure a larger area
is written back to memory and corresponding cache lines are invalidated.

Looking at flush_dcache_range() implementation, it does its own rounding,
based on CTR_EL0[DminLine] - "smallest data cache line size".
I guess this value might be smaller than ARCH_DMA_MINALIGN,
hence the explicit rounding to ARCH_DMA_MINALIGN is needed.

>>> @@ -466,9 +511,18 @@ static int instantiate_rng(u8 sec_idx, int 
>>> gen_sk)
>>>  		 * If the corresponding bit is set, this state handle
>>>  		 * was initialized by somebody else, so it's left alone.
>>>  		 */
>>> -		rdsta_val = sec_in32(&rng->rdsta) & RNG_STATE_HANDLE_MASK;
>>> -		if (rdsta_val & (1 << sh_idx))
>>> -			continue;
>>> +		rdsta_val = sec_in32(&rng->rdsta);
>>> +		if (rdsta_val & (RDSTA_IF(sh_idx) | RDSTA_PR(sh_idx))) {
>> Adding RDSTA_PR(sh_idx) to the mask is not needed,
>> PR bit is meaningless without IF bit set.
> 
> Ok.
> 
>>
>>> +			if (rdsta_val & RDSTA_PR(sh_idx))
>>> +				continue;
>> Could combine the condition here with the outer if condition:
>> 		if (rdsta_val & RDSTA_IF(sh_idx) && !(rdsta_val & RDSTA_PR(sh_idx))) 
>> {
> 
> If we keep that it is consistent with the linux driver. So I'd vote to
> keep it. Also the continue statement would be missing and thus the
> rng would be instantiated again. Or am I missing something?
> 
You are correct, let's keep this as is.

Thanks,
Horia
Horia Geanta June 19, 2020, 4:54 p.m. UTC | #5
On 6/19/2020 7:37 PM, Horia Geanta wrote:
> On 6/17/2020 11:48 PM, Michael Walle wrote:
>> Am 2020-06-17 21:15, schrieb Horia Geant?:
>>> On 6/4/2020 6:48 PM, Michael Walle wrote:
>>>> +
>>>> +	desc = memalign(ARCH_DMA_MINALIGN, desc_size);
>>>> +	if (!desc) {
>>>> +		debug("cannot allocate RNG init descriptor memory\n");
>>>> +		return -ENOMEM;
>>>> +	}
>>>> +
>>>> +	for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) {
>>>> +		/*
>>>> +		 * If the corresponding bit is set, then it means the state
>>>> +		 * handle was initialized by us, and thus it needs to be
>>>> +		 * deinitialized as well
>>>> +		 */
>>>> +
>>>> +		if (state_handle_mask & RDSTA_IF(sh_idx)) {
>>>> +			/*
>>>> +			 * Create the descriptor for deinstantating this state
>>>> +			 * handle.
>>>> +			 */
>>>> +			inline_cnstr_jobdesc_rng_deinstantiation(desc, sh_idx);
>>>> +			flush_dcache_range((unsigned long)desc,
>>>> +					   (unsigned long)desc + desc_size);
>>> Shouldn't this be roundup(desc_size, ARCH_DMA_MINALIGN) instead of 
>>> desc_size?
>>
>> I've seen the same idioms sometimes, but it wasn't clear to me why that 
>> would
>> be needed; the hardware only uses the desc_size, right?
>>
> Yes, HW will use only [desc, desc + desc_size].
> 
> I think this is needed to avoid cacheline sharing issues
> on non-coherent platforms: CPU needs to make sure a larger area
> is written back to memory and corresponding cache lines are invalidated.
> 
> Looking at flush_dcache_range() implementation, it does its own rounding,
> based on CTR_EL0[DminLine] - "smallest data cache line size".
> I guess this value might be smaller than ARCH_DMA_MINALIGN,
> hence the explicit rounding to ARCH_DMA_MINALIGN is needed.
> 
Btw, I think
	desc = memalign(ARCH_DMA_MINALIGN, desc_size);
needs to be replaced with
	desc = malloc_cache_aligned(desc_size);

Horia
Michael Walle June 19, 2020, 7:02 p.m. UTC | #6
Am 2020-06-19 18:54, schrieb Horia Geant?:
> On 6/19/2020 7:37 PM, Horia Geanta wrote:
>> On 6/17/2020 11:48 PM, Michael Walle wrote:
>>> Am 2020-06-17 21:15, schrieb Horia Geant?:
>>>> On 6/4/2020 6:48 PM, Michael Walle wrote:
>>>>> +
>>>>> +	desc = memalign(ARCH_DMA_MINALIGN, desc_size);
>>>>> +	if (!desc) {
>>>>> +		debug("cannot allocate RNG init descriptor memory\n");
>>>>> +		return -ENOMEM;
>>>>> +	}
>>>>> +
>>>>> +	for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) {
>>>>> +		/*
>>>>> +		 * If the corresponding bit is set, then it means the state
>>>>> +		 * handle was initialized by us, and thus it needs to be
>>>>> +		 * deinitialized as well
>>>>> +		 */
>>>>> +
>>>>> +		if (state_handle_mask & RDSTA_IF(sh_idx)) {
>>>>> +			/*
>>>>> +			 * Create the descriptor for deinstantating this state
>>>>> +			 * handle.
>>>>> +			 */
>>>>> +			inline_cnstr_jobdesc_rng_deinstantiation(desc, sh_idx);
>>>>> +			flush_dcache_range((unsigned long)desc,
>>>>> +					   (unsigned long)desc + desc_size);
>>>> Shouldn't this be roundup(desc_size, ARCH_DMA_MINALIGN) instead of
>>>> desc_size?
>>> 
>>> I've seen the same idioms sometimes, but it wasn't clear to me why 
>>> that
>>> would
>>> be needed; the hardware only uses the desc_size, right?
>>> 
>> Yes, HW will use only [desc, desc + desc_size].
>> 
>> I think this is needed to avoid cacheline sharing issues
>> on non-coherent platforms: CPU needs to make sure a larger area
>> is written back to memory and corresponding cache lines are 
>> invalidated.
>> 
>> Looking at flush_dcache_range() implementation, it does its own 
>> rounding,
>> based on CTR_EL0[DminLine] - "smallest data cache line size".
>> I guess this value might be smaller than ARCH_DMA_MINALIGN,
>> hence the explicit rounding to ARCH_DMA_MINALIGN is needed.
>> 
> Btw, I think
> 	desc = memalign(ARCH_DMA_MINALIGN, desc_size);
> needs to be replaced with
> 	desc = malloc_cache_aligned(desc_size);

But then the rounding is not needed, right? I mean there can't
be any other malloc() which might allocate memory in the same
cache line.

-michael
Horia Geanta June 22, 2020, 2:30 p.m. UTC | #7
On 6/19/2020 10:02 PM, Michael Walle wrote:
> Am 2020-06-19 18:54, schrieb Horia Geant?:
>> On 6/19/2020 7:37 PM, Horia Geanta wrote:
>>> On 6/17/2020 11:48 PM, Michael Walle wrote:
>>>> Am 2020-06-17 21:15, schrieb Horia Geant?:
>>>>> On 6/4/2020 6:48 PM, Michael Walle wrote:
>>>>>> +
>>>>>> +	desc = memalign(ARCH_DMA_MINALIGN, desc_size);
>>>>>> +	if (!desc) {
>>>>>> +		debug("cannot allocate RNG init descriptor memory\n");
>>>>>> +		return -ENOMEM;
>>>>>> +	}
>>>>>> +
>>>>>> +	for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) {
>>>>>> +		/*
>>>>>> +		 * If the corresponding bit is set, then it means the state
>>>>>> +		 * handle was initialized by us, and thus it needs to be
>>>>>> +		 * deinitialized as well
>>>>>> +		 */
>>>>>> +
>>>>>> +		if (state_handle_mask & RDSTA_IF(sh_idx)) {
>>>>>> +			/*
>>>>>> +			 * Create the descriptor for deinstantating this state
>>>>>> +			 * handle.
>>>>>> +			 */
>>>>>> +			inline_cnstr_jobdesc_rng_deinstantiation(desc, sh_idx);
>>>>>> +			flush_dcache_range((unsigned long)desc,
>>>>>> +					   (unsigned long)desc + desc_size);
>>>>> Shouldn't this be roundup(desc_size, ARCH_DMA_MINALIGN) instead of
>>>>> desc_size?
>>>>
>>>> I've seen the same idioms sometimes, but it wasn't clear to me why 
>>>> that
>>>> would
>>>> be needed; the hardware only uses the desc_size, right?
>>>>
>>> Yes, HW will use only [desc, desc + desc_size].
>>>
>>> I think this is needed to avoid cacheline sharing issues
>>> on non-coherent platforms: CPU needs to make sure a larger area
>>> is written back to memory and corresponding cache lines are 
>>> invalidated.
>>>
>>> Looking at flush_dcache_range() implementation, it does its own 
>>> rounding,
>>> based on CTR_EL0[DminLine] - "smallest data cache line size".
>>> I guess this value might be smaller than ARCH_DMA_MINALIGN,
>>> hence the explicit rounding to ARCH_DMA_MINALIGN is needed.
>>>
>> Btw, I think
>> 	desc = memalign(ARCH_DMA_MINALIGN, desc_size);
>> needs to be replaced with
>> 	desc = malloc_cache_aligned(desc_size);
> 
> But then the rounding is not needed, right? I mean there can't
> be any other malloc() which might allocate memory in the same
> cache line.
> 
Yes, in this case the rounding is no longer needed.

Horia
diff mbox series

Patch

diff --git a/drivers/crypto/fsl/desc.h b/drivers/crypto/fsl/desc.h
index 11ad506829..3589e6ea02 100644
--- a/drivers/crypto/fsl/desc.h
+++ b/drivers/crypto/fsl/desc.h
@@ -520,6 +520,8 @@ 
 #define OP_ALG_ICV_OFF		(0 << OP_ALG_ICV_SHIFT)
 #define OP_ALG_ICV_ON		(1 << OP_ALG_ICV_SHIFT)
 
+#define OP_ALG_PR_ON		0x02
+
 #define OP_ALG_DIR_SHIFT	0
 #define OP_ALG_DIR_MASK		1
 #define OP_ALG_DECRYPT		0
diff --git a/drivers/crypto/fsl/jobdesc.c b/drivers/crypto/fsl/jobdesc.c
index 6102e9c06b..d9554c550b 100644
--- a/drivers/crypto/fsl/jobdesc.c
+++ b/drivers/crypto/fsl/jobdesc.c
@@ -266,7 +266,8 @@  void inline_cnstr_jobdesc_rng_instantiation(u32 *desc, int handle, int do_sk)
 
 	/* INIT RNG in non-test mode */
 	append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG |
-			(handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT);
+			 (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT |
+			 OP_ALG_PR_ON);
 
 	/* For SH0, Secure Keys must be generated as well */
 	if (!handle && do_sk) {
@@ -286,6 +287,15 @@  void inline_cnstr_jobdesc_rng_instantiation(u32 *desc, int handle, int do_sk)
 	}
 }
 
+/* Descriptor for deinstantiation of the RNG block. */
+void inline_cnstr_jobdesc_rng_deinstantiation(u32 *desc, int handle)
+{
+	init_job_desc(desc, 0);
+
+	append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG |
+			 (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INITFINAL);
+}
+
 /* Change key size to bytes form bits in calling function*/
 void inline_cnstr_jobdesc_pkha_rsaexp(uint32_t *desc,
 				      struct pk_in_params *pkin, uint8_t *out,
diff --git a/drivers/crypto/fsl/jobdesc.h b/drivers/crypto/fsl/jobdesc.h
index 14b2a119d7..5185ddd535 100644
--- a/drivers/crypto/fsl/jobdesc.h
+++ b/drivers/crypto/fsl/jobdesc.h
@@ -41,6 +41,8 @@  void inline_cnstr_jobdesc_blob_decap(uint32_t *desc, uint8_t *key_idnfr,
 
 void inline_cnstr_jobdesc_rng_instantiation(u32 *desc, int handle, int do_sk);
 
+void inline_cnstr_jobdesc_rng_deinstantiation(u32 *desc, int handle);
+
 void inline_cnstr_jobdesc_pkha_rsaexp(uint32_t *desc,
 				      struct pk_in_params *pkin, uint8_t *out,
 				      uint32_t out_siz);
diff --git a/drivers/crypto/fsl/jr.c b/drivers/crypto/fsl/jr.c
index 42865a6cd7..14f9227b37 100644
--- a/drivers/crypto/fsl/jr.c
+++ b/drivers/crypto/fsl/jr.c
@@ -446,6 +446,51 @@  int sec_reset(void)
 	return sec_reset_idx(0);
 }
 #ifndef CONFIG_SPL_BUILD
+static int deinstantiate_rng(u8 sec_idx, int state_handle_mask)
+{
+	u32 *desc;
+	int sh_idx, ret = 0;
+	int desc_size = sizeof(u32) * 3;
+
+	desc = memalign(ARCH_DMA_MINALIGN, desc_size);
+	if (!desc) {
+		debug("cannot allocate RNG init descriptor memory\n");
+		return -ENOMEM;
+	}
+
+	for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) {
+		/*
+		 * If the corresponding bit is set, then it means the state
+		 * handle was initialized by us, and thus it needs to be
+		 * deinitialized as well
+		 */
+
+		if (state_handle_mask & RDSTA_IF(sh_idx)) {
+			/*
+			 * Create the descriptor for deinstantating this state
+			 * handle.
+			 */
+			inline_cnstr_jobdesc_rng_deinstantiation(desc, sh_idx);
+			flush_dcache_range((unsigned long)desc,
+					   (unsigned long)desc + desc_size);
+
+			ret = run_descriptor_jr_idx(desc, sec_idx);
+			if (ret) {
+				printf("SEC%u:  RNG4 SH%d deinstantiation failed with error 0x%x\n",
+				       sec_idx, sh_idx, ret);
+				ret = -EIO;
+				break;
+			}
+
+			printf("SEC%u:  Deinstantiated RNG4 SH%d\n",
+			       sec_idx, sh_idx);
+		}
+	}
+
+	free(desc);
+	return ret;
+}
+
 static int instantiate_rng(u8 sec_idx, int gen_sk)
 {
 	u32 *desc;
@@ -466,9 +511,18 @@  static int instantiate_rng(u8 sec_idx, int gen_sk)
 		 * If the corresponding bit is set, this state handle
 		 * was initialized by somebody else, so it's left alone.
 		 */
-		rdsta_val = sec_in32(&rng->rdsta) & RNG_STATE_HANDLE_MASK;
-		if (rdsta_val & (1 << sh_idx))
-			continue;
+		rdsta_val = sec_in32(&rng->rdsta);
+		if (rdsta_val & (RDSTA_IF(sh_idx) | RDSTA_PR(sh_idx))) {
+			if (rdsta_val & RDSTA_PR(sh_idx))
+				continue;
+
+			printf("SEC%u:  RNG4 SH%d was instantiated w/o prediction resistance. Tearing it down\n",
+			       sec_idx, sh_idx);
+
+			ret = deinstantiate_rng(sec_idx, RDSTA_IF(sh_idx));
+			if (ret)
+				break;
+		}
 
 		inline_cnstr_jobdesc_rng_instantiation(desc, sh_idx, gen_sk);
 		size = roundup(sizeof(uint32_t) * 6, ARCH_DMA_MINALIGN);
@@ -481,8 +535,8 @@  static int instantiate_rng(u8 sec_idx, int gen_sk)
 			printf("SEC%u:  RNG4 SH%d instantiation failed with error 0x%x\n",
 			       sec_idx, sh_idx, ret);
 
-		rdsta_val = sec_in32(&rng->rdsta) & RNG_STATE_HANDLE_MASK;
-		if (!(rdsta_val & (1 << sh_idx))) {
+		rdsta_val = sec_in32(&rng->rdsta);
+		if (!(rdsta_val & RDSTA_IF(sh_idx))) {
 			free(desc);
 			return -1;
 		}
@@ -554,7 +608,7 @@  static int rng_init(uint8_t sec_idx)
 
 	gen_sk = !(sec_in32(&rng->rdsta) & RDSTA_SKVN);
 	do {
-		inst_handles = sec_in32(&rng->rdsta) & RNG_STATE_HANDLE_MASK;
+		inst_handles = sec_in32(&rng->rdsta) & RDSTA_MASK;
 
 		/*
 		 * If either of the SH's were instantiated by somebody else
diff --git a/include/fsl_sec.h b/include/fsl_sec.h
index 64b8751f2d..1c6f1eb23e 100644
--- a/include/fsl_sec.h
+++ b/include/fsl_sec.h
@@ -65,10 +65,9 @@  struct rng4tst {
 		u32 rtfreqcnt;	/* PRGM=0: freq. count register */
 	};
 	u32 rsvd1[40];
-#define RNG_STATE0_HANDLE_INSTANTIATED	0x00000001
-#define RNG_STATE1_HANDLE_INSTANTIATED	0x00000002
-#define RNG_STATE_HANDLE_MASK	\
-	(RNG_STATE0_HANDLE_INSTANTIATED | RNG_STATE1_HANDLE_INSTANTIATED)
+#define RDSTA_IF(idx) (0x00000001 << (idx))
+#define RDSTA_PR(idx) (0x00000010 << (idx))
+#define RDSTA_MASK (RDSTA_PR(1) | RDSTA_PR(0) | RDSTA_IF(1) | RDSTA_IF(0))
 #define RDSTA_SKVN 0x40000000
 	u32 rdsta;		/*RNG DRNG Status Register*/
 	u32 rsvd2[15];