diff mbox series

[Part2,v6,07/49] x86/sev: Invalid pages from direct map when adding it to RMP table

Message ID 243778c282cd55a554af9c11d2ecd3ff9ea6820f.1655761627.git.ashish.kalra@amd.com
State New
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) | expand

Commit Message

Ashish Kalra June 20, 2022, 11:03 p.m. UTC
From: Brijesh Singh <brijesh.singh@amd.com>

The integrity guarantee of SEV-SNP is enforced through the RMP table.
The RMP is used with standard x86 and IOMMU page tables to enforce memory
restrictions and page access rights. The RMP check is enforced as soon as
SEV-SNP is enabled globally in the system. When hardware encounters an
RMP checks failure, it raises a page-fault exception.

The rmp_make_private() and rmp_make_shared() helpers are used to add
or remove the pages from the RMP table. Improve the rmp_make_private() to
invalid state so that pages cannot be used in the direct-map after its
added in the RMP table, and restore to its default valid permission after
the pages are removed from the RMP table.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 arch/x86/kernel/sev.c | 61 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 1 deletion(-)

Comments

Marc Orr June 24, 2022, 12:06 a.m. UTC | #1
On Mon, Jun 20, 2022 at 4:03 PM Ashish Kalra <Ashish.Kalra@amd.com> wrote:
>
> From: Brijesh Singh <brijesh.singh@amd.com>
>
> The integrity guarantee of SEV-SNP is enforced through the RMP table.
> The RMP is used with standard x86 and IOMMU page tables to enforce memory
> restrictions and page access rights. The RMP check is enforced as soon as
> SEV-SNP is enabled globally in the system. When hardware encounters an
> RMP checks failure, it raises a page-fault exception.

nit: "RMP checks ..." -> "RMP-check ..."

>
> The rmp_make_private() and rmp_make_shared() helpers are used to add
> or remove the pages from the RMP table. Improve the rmp_make_private() to
> invalid state so that pages cannot be used in the direct-map after its

nit: "invalid state ..." -> "invalidate state ..."
nit: "... after its" -> "... after they're"

(Here, and in the patch subject too.)

> added in the RMP table, and restore to its default valid permission after

nit: "... restore to its ..." -> "... restored to their ..."

> the pages are removed from the RMP table.
>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
>  arch/x86/kernel/sev.c | 61 ++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 60 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
> index f6c64a722e94..734cddd837f5 100644
> --- a/arch/x86/kernel/sev.c
> +++ b/arch/x86/kernel/sev.c
> @@ -2451,10 +2451,42 @@ int psmash(u64 pfn)
>  }
>  EXPORT_SYMBOL_GPL(psmash);
>
> +static int restore_direct_map(u64 pfn, int npages)
> +{
> +       int i, ret = 0;
> +
> +       for (i = 0; i < npages; i++) {
> +               ret = set_direct_map_default_noflush(pfn_to_page(pfn + i));
> +               if (ret)
> +                       goto cleanup;
> +       }
> +
> +cleanup:
> +       WARN(ret > 0, "Failed to restore direct map for pfn 0x%llx\n", pfn + i);
> +       return ret;
> +}
> +
> +static int invalid_direct_map(unsigned long pfn, int npages)

I think we should rename this function to "invalidate_direct_map()".

> +{
> +       int i, ret = 0;
> +
> +       for (i = 0; i < npages; i++) {
> +               ret = set_direct_map_invalid_noflush(pfn_to_page(pfn + i));
> +               if (ret)
> +                       goto cleanup;
> +       }
> +
> +       return 0;
> +
> +cleanup:
> +       restore_direct_map(pfn, i);
> +       return ret;
> +}
> +
>  static int rmpupdate(u64 pfn, struct rmpupdate *val)
>  {
>         unsigned long paddr = pfn << PAGE_SHIFT;
> -       int ret;
> +       int ret, level, npages;
>
>         if (!pfn_valid(pfn))
>                 return -EINVAL;
> @@ -2462,11 +2494,38 @@ static int rmpupdate(u64 pfn, struct rmpupdate *val)
>         if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
>                 return -ENXIO;
>
> +       level = RMP_TO_X86_PG_LEVEL(val->pagesize);
> +       npages = page_level_size(level) / PAGE_SIZE;
> +
> +       /*
> +        * If page is getting assigned in the RMP table then unmap it from the
> +        * direct map.
> +        */
> +       if (val->assigned) {
> +               if (invalid_direct_map(pfn, npages)) {
> +                       pr_err("Failed to unmap pfn 0x%llx pages %d from direct_map\n",
> +                              pfn, npages);
> +                       return -EFAULT;
> +               }
> +       }
> +
>         /* Binutils version 2.36 supports the RMPUPDATE mnemonic. */
>         asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE"
>                      : "=a"(ret)
>                      : "a"(paddr), "c"((unsigned long)val)
>                      : "memory", "cc");
> +
> +       /*
> +        * Restore the direct map after the page is removed from the RMP table.
> +        */
> +       if (!ret && !val->assigned) {
> +               if (restore_direct_map(pfn, npages)) {
> +                       pr_err("Failed to map pfn 0x%llx pages %d in direct_map\n",
> +                              pfn, npages);
> +                       return -EFAULT;
> +               }
> +       }
> +
>         return ret;
>  }
>
> --
> 2.25.1
>
>
Borislav Petkov July 27, 2022, 5:01 p.m. UTC | #2
On Mon, Jun 20, 2022 at 11:03:07PM +0000, Ashish Kalra wrote:

> Subject: x86/sev: Invalid pages from direct map when adding it to RMP table

"...: Invalidate pages from the direct map when adding them to the RMP table"

> +static int restore_direct_map(u64 pfn, int npages)
> +{
> +	int i, ret = 0;
> +
> +	for (i = 0; i < npages; i++) {
> +		ret = set_direct_map_default_noflush(pfn_to_page(pfn + i));

set_memory_p() ?

> +		if (ret)
> +			goto cleanup;
> +	}
> +
> +cleanup:
> +	WARN(ret > 0, "Failed to restore direct map for pfn 0x%llx\n", pfn + i);

Warn for each pfn?!

That'll flood dmesg mightily.

> +	return ret;
> +}
> +
> +static int invalid_direct_map(unsigned long pfn, int npages)
> +{
> +	int i, ret = 0;
> +
> +	for (i = 0; i < npages; i++) {
> +		ret = set_direct_map_invalid_noflush(pfn_to_page(pfn + i));

As above, set_memory_np() doesn't work here instead of looping over each
page?

> @@ -2462,11 +2494,38 @@ static int rmpupdate(u64 pfn, struct rmpupdate *val)
>  	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
>  		return -ENXIO;
>  
> +	level = RMP_TO_X86_PG_LEVEL(val->pagesize);
> +	npages = page_level_size(level) / PAGE_SIZE;
> +
> +	/*
> +	 * If page is getting assigned in the RMP table then unmap it from the
> +	 * direct map.
> +	 */
> +	if (val->assigned) {
> +		if (invalid_direct_map(pfn, npages)) {
> +			pr_err("Failed to unmap pfn 0x%llx pages %d from direct_map\n",

"Failed to unmap %d pages at pfn 0x... from the direct map\n"

> +			       pfn, npages);
> +			return -EFAULT;
> +		}
> +	}
> +
>  	/* Binutils version 2.36 supports the RMPUPDATE mnemonic. */
>  	asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE"
>  		     : "=a"(ret)
>  		     : "a"(paddr), "c"((unsigned long)val)
>  		     : "memory", "cc");
> +
> +	/*
> +	 * Restore the direct map after the page is removed from the RMP table.
> +	 */
> +	if (!ret && !val->assigned) {
> +		if (restore_direct_map(pfn, npages)) {
> +			pr_err("Failed to map pfn 0x%llx pages %d in direct_map\n",

"Failed to map %d pages at pfn 0x... into the direct map\n"

Thx.
Ashish Kalra Aug. 1, 2022, 11:57 p.m. UTC | #3
[AMD Official Use Only - General]

Hello Boris,

>> Subject: x86/sev: Invalid pages from direct map when adding it to RMP 
>> table

>"...: Invalidate pages from the direct map when adding them to the RMP table"
Ok

>> +static int restore_direct_map(u64 pfn, int npages) {
>> +	int i, ret = 0;
>> +
>> +	for (i = 0; i < npages; i++) {
>> +		ret = set_direct_map_default_noflush(pfn_to_page(pfn + i));

>set_memory_p() ?

You mean set_memory_present() ? 

Is there an issue with not using set_direct_map_default_noflush(), it is easier to correlate with
this function and it's functionality of restoring the page in the kernel direct map ?

> +		if (ret)
> +			goto cleanup;
> +	}
> +
> +cleanup:
> +	WARN(ret > 0, "Failed to restore direct map for pfn 0x%llx\n", pfn + 
> +i);

>Warn for each pfn?!

>That'll flood dmesg mightily.

> +	return ret;
> +}
> +
> +static int invalid_direct_map(unsigned long pfn, int npages) {
> +	int i, ret = 0;
> +
> +	for (i = 0; i < npages; i++) {
> +		ret = set_direct_map_invalid_noflush(pfn_to_page(pfn + i));

>As above, set_memory_np() doesn't work here instead of looping over each page?

Yes, set_memory_np() looks more efficient to use instead of looping over each page.

But again, calling set_direct_map_invalid_noflush() is easier to understand from the
calling function's point of view as it correlates to the functionality of invalidating the 
page from kernel direct map ? 

>> +	if (val->assigned) {
>> +		if (invalid_direct_map(pfn, npages)) {
>. +			pr_err("Failed to unmap pfn 0x%llx pages %d from direct_map\n",

>"Failed to unmap %d pages at pfn 0x... from the direct map\n"
Ok.

>> +	if (!ret && !val->assigned) {
>> +		if (restore_direct_map(pfn, npages)) {
>> +			pr_err("Failed to map pfn 0x%llx pages %d in direct_map\n",

>"Failed to map %d pages at pfn 0x... into the direct map\n"
Ok.

Thanks,
Ashish
Borislav Petkov Aug. 4, 2022, 12:11 p.m. UTC | #4
On Mon, Aug 01, 2022 at 11:57:09PM +0000, Kalra, Ashish wrote:
> You mean set_memory_present() ?

Right, that.

We have set_memory_np() but set_memory_present(). Talk about
consistence... ;-\

> But again, calling set_direct_map_invalid_noflush() is easier to
> understand from the calling function's point of view as it correlates
> to the functionality of invalidating the page from kernel direct map ?

You mean, we prefer easy to understand to performance?

set_direct_map_invalid_noflush() means crap to me. I have to go look it
up - set memory P or NP is much clearer.

The patch which added those things you consider easier to understand is:

commit d253ca0c3865a8d9a8c01143cf20425e0be4d0ce
Author: Rick Edgecombe <rick.p.edgecombe@intel.com>
Date:   Thu Apr 25 17:11:34 2019 -0700

    x86/mm/cpa: Add set_direct_map_*() functions
    
    Add two new functions set_direct_map_default_noflush() and
    set_direct_map_invalid_noflush() for setting the direct map alias for the
    page to its default valid permissions and to an invalid state that cannot
    be cached in a TLB, respectively. These functions do not flush the TLB.

I don't see how this fits with your use case...

Also, your helpers are called restore_direct_map and
invalidate_direct_map. That's already explaining what this is supposed
to do.
Ashish Kalra Nov. 2, 2022, 3:12 a.m. UTC | #5
Hello Boris,

On 8/4/2022 7:11 AM, Borislav Petkov wrote:
> On Mon, Aug 01, 2022 at 11:57:09PM +0000, Kalra, Ashish wrote:
>> You mean set_memory_present() ?
> 
> Right, that.
> 
> We have set_memory_np() but set_memory_present(). Talk about
> consistence... ;-\

Following up on this, now, set_memory_present() is a static interface,
so will need do add a new external API like set_memory_p() similar
to set_memory_np().

Also, looking at arch/x86/include/asm/set_memory.h:
..
/*
  * The set_memory_* API can be used to change various attributes of a
  * virtual address range. The attributes include:
  * Cacheability  : UnCached, WriteCombining, WriteThrough, WriteBack
  * Executability : eXecutable, NoteXecutable
  * Read/Write    : ReadOnly, ReadWrite
  * Presence      : NotPresent
  * Encryption    : Encrypted, Decrypted
  *
..
int set_memory_np(unsigned long addr, int numpages);
..

So currently there is no interface defined for changing the attribute of 
a range to present or restoring the range in the direct map.

Thanks,
Ashish
Borislav Petkov Nov. 2, 2022, 11:27 a.m. UTC | #6
On Tue, Nov 01, 2022 at 10:12:35PM -0500, Kalra, Ashish wrote:
> Following up on this, now, set_memory_present() is a static interface,
> so will need do add a new external API like set_memory_p() similar
> to set_memory_np().

It is called set_memory_p() now and you can "un-static" it. :)

> So currently there is no interface defined for changing the attribute of a
> range to present or restoring the range in the direct map.

No?

static int set_memory_p(unsigned long *addr, int numpages)
{
        return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0);
}

:-)
Michael Roth Dec. 19, 2022, 3 p.m. UTC | #7
On Wed, Jul 27, 2022 at 07:01:34PM +0200, Borislav Petkov wrote:
> On Mon, Jun 20, 2022 at 11:03:07PM +0000, Ashish Kalra wrote:
> 
> > Subject: x86/sev: Invalid pages from direct map when adding it to RMP table
> 
> "...: Invalidate pages from the direct map when adding them to the RMP table"
> 
> > +static int restore_direct_map(u64 pfn, int npages)
> > +{
> > +	int i, ret = 0;
> > +
> > +	for (i = 0; i < npages; i++) {
> > +		ret = set_direct_map_default_noflush(pfn_to_page(pfn + i));
> 
> set_memory_p() ?

We implemented this approach for v7, but it causes a fairly significant
performance regression, particularly for the case for npages > 1 which
this change was meant to optimize.

I still need to dig in a big but I'm guessing it's related to flushing
behavior.

It would however be nice to have a set_direct_map_default_noflush()
variant that accepted a 'npages' argument, since it would be more
performant here and also would potentially allow for restoring the 2M
direct mapping in some cases. Will look into this more for v8.

-Mike

> 
> > +		if (ret)
> > +			goto cleanup;
> > +	}
> > +
> > +cleanup:
> > +	WARN(ret > 0, "Failed to restore direct map for pfn 0x%llx\n", pfn + i);
> 
> Warn for each pfn?!
> 
> That'll flood dmesg mightily.
> 
> > +	return ret;
> > +}
> > +
> > +static int invalid_direct_map(unsigned long pfn, int npages)
> > +{
> > +	int i, ret = 0;
> > +
> > +	for (i = 0; i < npages; i++) {
> > +		ret = set_direct_map_invalid_noflush(pfn_to_page(pfn + i));
> 
> As above, set_memory_np() doesn't work here instead of looping over each
> page?
> 
> > @@ -2462,11 +2494,38 @@ static int rmpupdate(u64 pfn, struct rmpupdate *val)
> >  	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
> >  		return -ENXIO;
> >  
> > +	level = RMP_TO_X86_PG_LEVEL(val->pagesize);
> > +	npages = page_level_size(level) / PAGE_SIZE;
> > +
> > +	/*
> > +	 * If page is getting assigned in the RMP table then unmap it from the
> > +	 * direct map.
> > +	 */
> > +	if (val->assigned) {
> > +		if (invalid_direct_map(pfn, npages)) {
> > +			pr_err("Failed to unmap pfn 0x%llx pages %d from direct_map\n",
> 
> "Failed to unmap %d pages at pfn 0x... from the direct map\n"
> 
> > +			       pfn, npages);
> > +			return -EFAULT;
> > +		}
> > +	}
> > +
> >  	/* Binutils version 2.36 supports the RMPUPDATE mnemonic. */
> >  	asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE"
> >  		     : "=a"(ret)
> >  		     : "a"(paddr), "c"((unsigned long)val)
> >  		     : "memory", "cc");
> > +
> > +	/*
> > +	 * Restore the direct map after the page is removed from the RMP table.
> > +	 */
> > +	if (!ret && !val->assigned) {
> > +		if (restore_direct_map(pfn, npages)) {
> > +			pr_err("Failed to map pfn 0x%llx pages %d in direct_map\n",
> 
> "Failed to map %d pages at pfn 0x... into the direct map\n"
> 
> Thx.
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette
Ashish Kalra Dec. 27, 2022, 9:49 p.m. UTC | #8
Hello Boris,

On 12/19/2022 2:08 PM, Borislav Petkov wrote:
> On Mon, Dec 19, 2022 at 09:00:26AM -0600, Michael Roth wrote:
>> We implemented this approach for v7, but it causes a fairly significant
>> performance regression, particularly for the case for npages > 1 which
>> this change was meant to optimize.
>>
>> I still need to dig in a big but I'm guessing it's related to flushing
>> behavior.
> 
> Well, AFAICT, change_page_attr_set_clr() flushes once at the end.
> 
> Don't you need to flush when you modify the direct map?
> 

Milan onward, there is H/W support for coherency between mappings of the 
same physical page with different encryption keys, so AFAIK, there 
should be no need to flush during page state transitions, where we 
invoke these direct map interface functions for re-mapping/invalidating 
pages.

I don't know if there is any other reason to flush after modifying
the direct map ?

Thanks,
Ashish
Borislav Petkov Dec. 29, 2022, 5:09 p.m. UTC | #9
On Tue, Dec 27, 2022 at 03:49:39PM -0600, Kalra, Ashish wrote:
> Milan onward,

And before ML there's no SNP, right?

> there is H/W support for coherency between mappings of the
> same physical page with different encryption keys, so AFAIK, there should be
> no need to flush during page state transitions, where we invoke these direct
> map interface functions for re-mapping/invalidating pages.

Yah, that rings a bell.

In any case, the fact that flushing is not needed should be stated
somewhere in text so that it is clear why.

> I don't know if there is any other reason to flush after modifying
> the direct map ?

There's

        /*
         * No need to flush, when we did not set any of the caching
         * attributes:
         */
        cache = !!pgprot2cachemode(mask_set);


Does the above HW cover this case too?

Thx.
Mike Rapoport Dec. 30, 2022, 3:19 p.m. UTC | #10
On Mon, Dec 19, 2022 at 09:08:31PM +0100, Borislav Petkov wrote:
> On Mon, Dec 19, 2022 at 09:00:26AM -0600, Michael Roth wrote:
> > We implemented this approach for v7, but it causes a fairly significant
> > performance regression, particularly for the case for npages > 1 which
> > this change was meant to optimize.
> > 
> > I still need to dig in a big but I'm guessing it's related to flushing
> > behavior.
> 
> Well, AFAICT, change_page_attr_set_clr() flushes once at the end.
> 
> Don't you need to flush when you modify the direct map?
> 
> > It would however be nice to have a set_direct_map_default_noflush()
> > variant that accepted a 'npages' argument, since it would be more
> > performant here and also would potentially allow for restoring the 2M
> > direct mapping in some cases. Will look into this more for v8.
> 
> set_pages_direct_map_default_noflush()
>
> I guess.
> 
> Although the name is a mouthful so I wouldn't mind having those
> shortened.

I had a patch that just adds numpages parameter:

https://lore.kernel.org/lkml/20201123095432.5860-4-rppt@kernel.org/

The set_direct_map*() are not too widely used, so it's not a big deal to
update all callers.
 
> In any case, as long as that helper is properly defined and documented,
> I don't mind.
> 
> Thx.
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette
>
Ashish Kalra Jan. 5, 2023, 9:46 p.m. UTC | #11
Hello Boris,

On 12/29/2022 11:09 AM, Borislav Petkov wrote:
> On Tue, Dec 27, 2022 at 03:49:39PM -0600, Kalra, Ashish wrote:
>> Milan onward,
> 
> And before ML there's no SNP, right?
> 

Yes, that's correct.

>> there is H/W support for coherency between mappings of the
>> same physical page with different encryption keys, so AFAIK, there should be
>> no need to flush during page state transitions, where we invoke these direct
>> map interface functions for re-mapping/invalidating pages.
> 
> Yah, that rings a bell.
> 
> In any case, the fact that flushing is not needed should be stated
> somewhere in text so that it is clear why.
> 
>> I don't know if there is any other reason to flush after modifying
>> the direct map ?
> 
> There's
> 
>          /*
>           * No need to flush, when we did not set any of the caching
>           * attributes:
>           */
>          cache = !!pgprot2cachemode(mask_set);
> 
> 
> Does the above HW cover this case too?

Actually, as both set_memory_p() and set_memory_np() are only 
setting/clearing the _PAGE_PRESENT flag and not changing any of the page 
caching attributes so this flush won't be required anyway.

Thanks,
Ashish

> 
> Thx.
>
Marc Orr Jan. 5, 2023, 10:08 p.m. UTC | #12
On Tue, Dec 27, 2022 at 1:49 PM Kalra, Ashish <ashish.kalra@amd.com> wrote:
>
> Hello Boris,
>
> On 12/19/2022 2:08 PM, Borislav Petkov wrote:
> > On Mon, Dec 19, 2022 at 09:00:26AM -0600, Michael Roth wrote:
> >> We implemented this approach for v7, but it causes a fairly significant
> >> performance regression, particularly for the case for npages > 1 which
> >> this change was meant to optimize.
> >>
> >> I still need to dig in a big but I'm guessing it's related to flushing
> >> behavior.
> >
> > Well, AFAICT, change_page_attr_set_clr() flushes once at the end.
> >
> > Don't you need to flush when you modify the direct map?
> >
>
> Milan onward, there is H/W support for coherency between mappings of the
> same physical page with different encryption keys, so AFAIK, there
> should be no need to flush during page state transitions, where we
> invoke these direct map interface functions for re-mapping/invalidating
> pages.
>
> I don't know if there is any other reason to flush after modifying
> the direct map ?

Isn't the Milan coherence feature (SME_COHERENT?) about the caches --
not the TLBs? And isn't the flushing being discussed here about the
TLBs?

Also, I thought that Mingwei Zhang <mizhang@google.com> found that the
Milan SEV coherence feature was basically unusable in Linux because it
only works across CPUs. It does not extend to IO (e.g., CPU caches
need to be flushed prior to free'ing a SEV VM's private address and
reallocating that location to a device driver to be used for IO). My
understanding of this feature and its limitations may be too coarse.
But I think we should be very careful about relying on this feature as
it is implemented in Milan.

That being said, I guess I could see an argument to rely on the
feature here, since we're not deallocating the memory and reallocating
it to a device. But again, I thought the feature was about cache
coherence -- not TLB coherence.
Marc Orr Jan. 5, 2023, 10:31 p.m. UTC | #13
On Thu, Jan 5, 2023 at 2:27 PM Kalra, Ashish <ashish.kalra@amd.com> wrote:
>
> Hello Marc,
>
> On 1/5/2023 4:08 PM, Marc Orr wrote:
> > On Tue, Dec 27, 2022 at 1:49 PM Kalra, Ashish <ashish.kalra@amd.com> wrote:
> >>
> >> Hello Boris,
> >>
> >> On 12/19/2022 2:08 PM, Borislav Petkov wrote:
> >>> On Mon, Dec 19, 2022 at 09:00:26AM -0600, Michael Roth wrote:
> >>>> We implemented this approach for v7, but it causes a fairly significant
> >>>> performance regression, particularly for the case for npages > 1 which
> >>>> this change was meant to optimize.
> >>>>
> >>>> I still need to dig in a big but I'm guessing it's related to flushing
> >>>> behavior.
> >>>
> >>> Well, AFAICT, change_page_attr_set_clr() flushes once at the end.
> >>>
> >>> Don't you need to flush when you modify the direct map?
> >>>
> >>
> >> Milan onward, there is H/W support for coherency between mappings of the
> >> same physical page with different encryption keys, so AFAIK, there
> >> should be no need to flush during page state transitions, where we
> >> invoke these direct map interface functions for re-mapping/invalidating
> >> pages.
> >>
> >> I don't know if there is any other reason to flush after modifying
> >> the direct map ?
> >
> > Isn't the Milan coherence feature (SME_COHERENT?) about the caches --
> > not the TLBs? And isn't the flushing being discussed here about the
> > TLBs?
>
> Actually, the flush does both cache and TLB flushing.
>
> Both cpa_flush() and cpa_flush_all() will also do cache flushing if
> cache argument is not NULL. As in this case, no page caching attributes
> are being changed so there is no need to do cache flushing.
>
> But TLB flushing (as PTE is updated) is still required and will be done.

Ah, got it now. Thanks for explaining. (I should've looked at the code
a bit closer.)

> > Also, I thought that Mingwei Zhang <mizhang@google.com> found that the
> > Milan SEV coherence feature was basically unusable in Linux because it
> > only works across CPUs. It does not extend to IO (e.g., CPU caches
> > need to be flushed prior to free'ing a SEV VM's private address and
> > reallocating that location to a device driver to be used for IO). My
> > understanding of this feature and its limitations may be too coarse.
> > But I think we should be very careful about relying on this feature as
> > it is implemented in Milan.
> >
> > That being said, I guess I could see an argument to rely on the
> > feature here, since we're not deallocating the memory and reallocating
> > it to a device. But again, I thought the feature was about cache
> > coherence -- not TLB coherence.
>
> Yes, this is just invalidating or re-mapping into the kernel direct map,
> so we can rely on this feature for the use case here.

SGTM and that does make sense then. Thanks for confirming.
diff mbox series

Patch

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index f6c64a722e94..734cddd837f5 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -2451,10 +2451,42 @@  int psmash(u64 pfn)
 }
 EXPORT_SYMBOL_GPL(psmash);
 
+static int restore_direct_map(u64 pfn, int npages)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < npages; i++) {
+		ret = set_direct_map_default_noflush(pfn_to_page(pfn + i));
+		if (ret)
+			goto cleanup;
+	}
+
+cleanup:
+	WARN(ret > 0, "Failed to restore direct map for pfn 0x%llx\n", pfn + i);
+	return ret;
+}
+
+static int invalid_direct_map(unsigned long pfn, int npages)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < npages; i++) {
+		ret = set_direct_map_invalid_noflush(pfn_to_page(pfn + i));
+		if (ret)
+			goto cleanup;
+	}
+
+	return 0;
+
+cleanup:
+	restore_direct_map(pfn, i);
+	return ret;
+}
+
 static int rmpupdate(u64 pfn, struct rmpupdate *val)
 {
 	unsigned long paddr = pfn << PAGE_SHIFT;
-	int ret;
+	int ret, level, npages;
 
 	if (!pfn_valid(pfn))
 		return -EINVAL;
@@ -2462,11 +2494,38 @@  static int rmpupdate(u64 pfn, struct rmpupdate *val)
 	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
 		return -ENXIO;
 
+	level = RMP_TO_X86_PG_LEVEL(val->pagesize);
+	npages = page_level_size(level) / PAGE_SIZE;
+
+	/*
+	 * If page is getting assigned in the RMP table then unmap it from the
+	 * direct map.
+	 */
+	if (val->assigned) {
+		if (invalid_direct_map(pfn, npages)) {
+			pr_err("Failed to unmap pfn 0x%llx pages %d from direct_map\n",
+			       pfn, npages);
+			return -EFAULT;
+		}
+	}
+
 	/* Binutils version 2.36 supports the RMPUPDATE mnemonic. */
 	asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE"
 		     : "=a"(ret)
 		     : "a"(paddr), "c"((unsigned long)val)
 		     : "memory", "cc");
+
+	/*
+	 * Restore the direct map after the page is removed from the RMP table.
+	 */
+	if (!ret && !val->assigned) {
+		if (restore_direct_map(pfn, npages)) {
+			pr_err("Failed to map pfn 0x%llx pages %d in direct_map\n",
+			       pfn, npages);
+			return -EFAULT;
+		}
+	}
+
 	return ret;
 }