diff mbox series

[bpf-next,v2,1/2] bpf: use kvmalloc for map values in syscall

Message ID 20210817154556.92901-1-sdf@google.com
State Superseded
Headers show
Series [bpf-next,v2,1/2] bpf: use kvmalloc for map values in syscall | expand

Commit Message

Stanislav Fomichev Aug. 17, 2021, 3:45 p.m. UTC
Use kvmalloc/kvfree for temporary value when manipulating a map via
syscall. kmalloc might not be sufficient for percpu maps where the value
is big (and further multiplied by hundreds of CPUs).

Can be reproduced with netcnt test on qemu with "-smp 255".

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 kernel/bpf/syscall.c | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

Comments

Daniel Borkmann Aug. 18, 2021, 10:24 p.m. UTC | #1
On 8/17/21 5:45 PM, Stanislav Fomichev wrote:
> Same as previous patch but for the keys. memdup_bpfptr is renamed

> to vmemdup_bpfptr (and converted to kvmalloc).

> 

> Signed-off-by: Stanislav Fomichev <sdf@google.com>

> ---

>   include/linux/bpfptr.h | 12 ++++++++++--

>   kernel/bpf/syscall.c   | 34 +++++++++++++++++-----------------

>   2 files changed, 27 insertions(+), 19 deletions(-)

> 

> diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h

> index 5cdeab497cb3..84eeffb4316a 100644

> --- a/include/linux/bpfptr.h

> +++ b/include/linux/bpfptr.h

> @@ -62,9 +62,17 @@ static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset,

>   	return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size);

>   }

>   

> -static inline void *memdup_bpfptr(bpfptr_t src, size_t len)

> +static inline void *vmemdup_bpfptr(bpfptr_t src, size_t len)


nit: should we just name it kvmemdup_bpfptr() in that case?

>   {

> -	return memdup_sockptr((sockptr_t) src, len);

> +	void *p = kvmalloc(len, GFP_USER | __GFP_NOWARN);

> +

> +	if (!p)

> +		return ERR_PTR(-ENOMEM);

> +	if (copy_from_sockptr(p, (sockptr_t) src, len)) {


Also, I think this one should rather use copy_from_bpfptr() here.

> +		kvfree(p);

> +		return ERR_PTR(-EFAULT);

> +	}

> +	return p;

>   }

>   


Rest lgtm, thanks!
Stanislav Fomichev Aug. 18, 2021, 11:02 p.m. UTC | #2
On 08/19, Daniel Borkmann wrote:
> On 8/17/21 5:45 PM, Stanislav Fomichev wrote:

> > Same as previous patch but for the keys. memdup_bpfptr is renamed

> > to vmemdup_bpfptr (and converted to kvmalloc).

> >

> > Signed-off-by: Stanislav Fomichev <sdf@google.com>

> > ---

> >   include/linux/bpfptr.h | 12 ++++++++++--

> >   kernel/bpf/syscall.c   | 34 +++++++++++++++++-----------------

> >   2 files changed, 27 insertions(+), 19 deletions(-)

> >

> > diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h

> > index 5cdeab497cb3..84eeffb4316a 100644

> > --- a/include/linux/bpfptr.h

> > +++ b/include/linux/bpfptr.h

> > @@ -62,9 +62,17 @@ static inline int copy_to_bpfptr_offset(bpfptr_t  

> dst, size_t offset,

> >   	return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size);

> >   }

> > -static inline void *memdup_bpfptr(bpfptr_t src, size_t len)

> > +static inline void *vmemdup_bpfptr(bpfptr_t src, size_t len)


> nit: should we just name it kvmemdup_bpfptr() in that case?

Sounds good!

> >   {

> > -	return memdup_sockptr((sockptr_t) src, len);

> > +	void *p = kvmalloc(len, GFP_USER | __GFP_NOWARN);

> > +

> > +	if (!p)

> > +		return ERR_PTR(-ENOMEM);

> > +	if (copy_from_sockptr(p, (sockptr_t) src, len)) {


> Also, I think this one should rather use copy_from_bpfptr() here.

Ah, missed that one, thanks!

> > +		kvfree(p);

> > +		return ERR_PTR(-EFAULT);

> > +	}

> > +	return p;

> >   }


> Rest lgtm, thanks!
diff mbox series

Patch

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7420e1334ab2..075f650d297a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1076,7 +1076,7 @@  static int map_lookup_elem(union bpf_attr *attr)
 	value_size = bpf_map_value_size(map);
 
 	err = -ENOMEM;
-	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
 	if (!value)
 		goto free_key;
 
@@ -1091,7 +1091,7 @@  static int map_lookup_elem(union bpf_attr *attr)
 	err = 0;
 
 free_value:
-	kfree(value);
+	kvfree(value);
 free_key:
 	kfree(key);
 err_put:
@@ -1137,16 +1137,10 @@  static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
 		goto err_put;
 	}
 
-	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
-	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
-	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
-	    map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
-		value_size = round_up(map->value_size, 8) * num_possible_cpus();
-	else
-		value_size = map->value_size;
+	value_size = bpf_map_value_size(map);
 
 	err = -ENOMEM;
-	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
 	if (!value)
 		goto free_key;
 
@@ -1157,7 +1151,7 @@  static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
 	err = bpf_map_update_value(map, f, key, value, attr->flags);
 
 free_value:
-	kfree(value);
+	kvfree(value);
 free_key:
 	kfree(key);
 err_put:
@@ -1367,7 +1361,7 @@  int generic_map_update_batch(struct bpf_map *map,
 	if (!key)
 		return -ENOMEM;
 
-	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
 	if (!value) {
 		kfree(key);
 		return -ENOMEM;
@@ -1390,7 +1384,7 @@  int generic_map_update_batch(struct bpf_map *map,
 	if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
 		err = -EFAULT;
 
-	kfree(value);
+	kvfree(value);
 	kfree(key);
 	return err;
 }
@@ -1429,7 +1423,7 @@  int generic_map_lookup_batch(struct bpf_map *map,
 	if (!buf_prevkey)
 		return -ENOMEM;
 
-	buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
+	buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
 	if (!buf) {
 		kfree(buf_prevkey);
 		return -ENOMEM;
@@ -1492,7 +1486,7 @@  int generic_map_lookup_batch(struct bpf_map *map,
 
 free_buf:
 	kfree(buf_prevkey);
-	kfree(buf);
+	kvfree(buf);
 	return err;
 }
 
@@ -1547,7 +1541,7 @@  static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	value_size = bpf_map_value_size(map);
 
 	err = -ENOMEM;
-	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
 	if (!value)
 		goto free_key;
 
@@ -1579,7 +1573,7 @@  static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	err = 0;
 
 free_value:
-	kfree(value);
+	kvfree(value);
 free_key:
 	kfree(key);
 err_put: