diff mbox series

[v4,13/25] x86: mp: Allow running functions on multiple CPUs

Message ID 20200707193423.v4.13.I15c8366d7a11d1eeea57e5ac4d0471a95725ce9e@changeid
State Accepted
Commit 84d3ed125ad91c5973c4a071be5eea913bea34e5
Headers show
Series x86: Enhance MTRR functionality to support multiple CPUs | expand

Commit Message

Simon Glass July 8, 2020, 1:34 a.m. UTC
Add a way to run a function on a selection of CPUs. This supports either
a single CPU, all CPUs, just the main CPU or just the 'APs', in Intel
terminology.

It works by writing into a mailbox and then waiting for the CPUs to notice
it, take action and indicate they are done.

When SMP is not yet enabled, this just calls the function on the main CPU.

Signed-off-by: Simon Glass <sjg at chromium.org>
Reviewed-by: Wolfgang Wallner <wolfgang.wallner at br-automation.com>
---

Changes in v4:
- Only enable this feature of CONFIG_SMP_AP_WORK is enabled
- Allow running on the BSP if SMP is not enabled

Changes in v3:
- Add a comment to run_ap_work()
- Rename flag to GD_FLG_SMP_READY
- Update the comment for run_ap_work() to explain logical_cpu_number
- Clarify meaning of @cpu_select in mp_run_on_cpus() comment

 arch/x86/cpu/mp_init.c    | 107 +++++++++++++++++++++++++++++++++++---
 arch/x86/include/asm/mp.h |  33 ++++++++++++
 2 files changed, 134 insertions(+), 6 deletions(-)

Comments

Bin Meng July 13, 2020, 4:56 a.m. UTC | #1
Hi Simon,

On Wed, Jul 8, 2020 at 9:37 AM Simon Glass <sjg at chromium.org> wrote:
>
> Add a way to run a function on a selection of CPUs. This supports either
> a single CPU, all CPUs, just the main CPU or just the 'APs', in Intel
> terminology.
>
> It works by writing into a mailbox and then waiting for the CPUs to notice
> it, take action and indicate they are done.
>
> When SMP is not yet enabled, this just calls the function on the main CPU.
>
> Signed-off-by: Simon Glass <sjg at chromium.org>
> Reviewed-by: Wolfgang Wallner <wolfgang.wallner at br-automation.com>
> ---
>
> Changes in v4:
> - Only enable this feature of CONFIG_SMP_AP_WORK is enabled
> - Allow running on the BSP if SMP is not enabled
>
> Changes in v3:
> - Add a comment to run_ap_work()
> - Rename flag to GD_FLG_SMP_READY
> - Update the comment for run_ap_work() to explain logical_cpu_number
> - Clarify meaning of @cpu_select in mp_run_on_cpus() comment
>
>  arch/x86/cpu/mp_init.c    | 107 +++++++++++++++++++++++++++++++++++---
>  arch/x86/include/asm/mp.h |  33 ++++++++++++
>  2 files changed, 134 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/cpu/mp_init.c b/arch/x86/cpu/mp_init.c
> index 69a23829b9..dd6d6bfab7 100644
> --- a/arch/x86/cpu/mp_init.c
> +++ b/arch/x86/cpu/mp_init.c
> @@ -54,12 +54,7 @@ struct mp_flight_plan {
>   *     callback
>   */
>  struct mp_callback {
> -       /**
> -        * func() - Function to call on the AP
> -        *
> -        * @arg: Argument to pass
> -        */
> -       void (*func)(void *arg);
> +       mp_run_func func;
>         void *arg;
>         int logical_cpu_number;
>  };
> @@ -514,6 +509,70 @@ static void store_callback(struct mp_callback **slot, struct mp_callback *val)
>         dmb();
>  }
>
> +/**
> + * run_ap_work() - Run a callback on selected APs
> + *
> + * This writes @callback to all APs and waits for them all to acknowledge it,
> + * Note that whether each AP actually calls the callback depends on the value
> + * of logical_cpu_number (see struct mp_callback). The logical CPU number is
> + * the CPU device's req->seq value.
> + *
> + * @callback: Callback information to pass to all APs
> + * @bsp: CPU device for the BSP
> + * @num_cpus: The number of CPUs in the system (= number of APs + 1)
> + * @expire_ms: Timeout to wait for all APs to finish, in milliseconds, or 0 for
> + *     no timeout
> + * @return 0 if OK, -ETIMEDOUT if one or more APs failed to respond in time
> + */
> +static int run_ap_work(struct mp_callback *callback, struct udevice *bsp,
> +                      int num_cpus, uint expire_ms)
> +{
> +       int cur_cpu = bsp->req_seq;
> +       int num_aps = num_cpus - 1; /* number of non-BSPs to get this message */
> +       int cpus_accepted;
> +       ulong start;
> +       int i;
> +
> +       if (!IS_ENABLED(CONFIG_SMP_AP_WORK)) {
> +               printf("APs already parked. CONFIG_SMP_AP_WORK not enabled\n");
> +               return -ENOTSUPP;
> +       }
> +
> +       /* Signal to all the APs to run the func. */
> +       for (i = 0; i < num_cpus; i++) {
> +               if (cur_cpu != i)
> +                       store_callback(&ap_callbacks[i], callback);
> +       }
> +       mfence();
> +
> +       /* Wait for all the APs to signal back that call has been accepted. */
> +       start = get_timer(0);
> +
> +       do {
> +               mdelay(1);
> +               cpus_accepted = 0;
> +
> +               for (i = 0; i < num_cpus; i++) {
> +                       if (cur_cpu == i)
> +                               continue;
> +                       if (!read_callback(&ap_callbacks[i]))
> +                               cpus_accepted++;

It looks my previous comments were not addressed. I believe there is a
bug here that cpus_accepted will double count for the 2nd time in the
do {} while () loop.

> +               }
> +
> +               if (expire_ms && get_timer(start) >= expire_ms) {
> +                       log(UCLASS_CPU, LOGL_CRIT,
> +                           "AP call expired; %d/%d CPUs accepted\n",
> +                           cpus_accepted, num_aps);
> +                       return -ETIMEDOUT;
> +               }
> +       } while (cpus_accepted != num_aps);
> +
> +       /* Make sure we can see any data written by the APs */
> +       mfence();
> +
> +       return 0;
> +}
> +
>  /**
>   * ap_wait_for_instruction() - Wait for and process requests from the main CPU
>   *
> @@ -573,6 +632,42 @@ static struct mp_flight_record mp_steps[] = {
>         MP_FR_BLOCK_APS(ap_wait_for_instruction, NULL, NULL, NULL),
>  };
>
> +int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg)
> +{
> +       struct mp_callback lcb = {
> +               .func = func,
> +               .arg = arg,
> +               .logical_cpu_number = cpu_select,
> +       };
> +       struct udevice *dev;
> +       int num_cpus;
> +       int ret;
> +
> +       ret = get_bsp(&dev, &num_cpus);
> +       if (ret < 0)
> +               return log_msg_ret("bsp", ret);
> +       if (cpu_select == MP_SELECT_ALL || cpu_select == MP_SELECT_BSP ||
> +           cpu_select == ret) {
> +               /* Run on BSP first */
> +               func(arg);
> +       }
> +
> +       if (!IS_ENABLED(CONFIG_SMP_AP_WORK) ||
> +           !(gd->flags & GD_FLG_SMP_READY)) {
> +               /* Allow use of this function on the BSP only */
> +               if (cpu_select == MP_SELECT_BSP || !cpu_select)
> +                       return 0;
> +               return -ENOTSUPP;
> +       }
> +
> +       /* Allow up to 1 second for all APs to finish */
> +       ret = run_ap_work(&lcb, dev, num_cpus, 1000 /* ms */);
> +       if (ret)
> +               return log_msg_ret("aps", ret);
> +
> +       return 0;
> +}
> +
>  int mp_init(void)
>  {
>         int num_aps, num_cpus;
> diff --git a/arch/x86/include/asm/mp.h b/arch/x86/include/asm/mp.h
> index 41b1575f4b..eb49e690f2 100644
> --- a/arch/x86/include/asm/mp.h
> +++ b/arch/x86/include/asm/mp.h
> @@ -86,4 +86,37 @@ int mp_init(void);
>  /* Set up additional CPUs */
>  int x86_mp_init(void);
>
> +/**
> + * mp_run_func() - Function to call on the AP
> + *
> + * @arg: Argument to pass
> + */
> +typedef void (*mp_run_func)(void *arg);
> +
> +#if defined(CONFIG_SMP) && !CONFIG_IS_ENABLED(X86_64)
> +/**
> + * mp_run_on_cpus() - Run a function on one or all CPUs
> + *
> + * This does not return until all CPUs have completed the work
> + *
> + * Running on anything other than the boot CPU is only supported if
> + * CONFIG_SMP_AP_WORK is enabled
> + *
> + * @cpu_select: CPU to run on (its dev->req_seq value), or MP_SELECT_ALL for
> + *     all, or MP_SELECT_BSP for BSP
> + * @func: Function to run
> + * @arg: Argument to pass to the function
> + * @return 0 on success, -ve on error
> + */
> +int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg);
> +#else
> +static inline int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg)
> +{
> +       /* There is only one CPU, so just call the function here */
> +       func(arg);
> +
> +       return 0;
> +}
> +#endif
> +
>  #endif /* _X86_MP_H_ */

Regards,
Bin
Simon Glass July 13, 2020, 8:57 p.m. UTC | #2
Hi Bin,

On Sun, 12 Jul 2020 at 22:56, Bin Meng <bmeng.cn at gmail.com> wrote:
>
> Hi Simon,
>
> On Wed, Jul 8, 2020 at 9:37 AM Simon Glass <sjg at chromium.org> wrote:
> >
> > Add a way to run a function on a selection of CPUs. This supports either
> > a single CPU, all CPUs, just the main CPU or just the 'APs', in Intel
> > terminology.
> >
> > It works by writing into a mailbox and then waiting for the CPUs to notice
> > it, take action and indicate they are done.
> >
> > When SMP is not yet enabled, this just calls the function on the main CPU.
> >
> > Signed-off-by: Simon Glass <sjg at chromium.org>
> > Reviewed-by: Wolfgang Wallner <wolfgang.wallner at br-automation.com>
> > ---
> >
> > Changes in v4:
> > - Only enable this feature of CONFIG_SMP_AP_WORK is enabled
> > - Allow running on the BSP if SMP is not enabled
> >
> > Changes in v3:
> > - Add a comment to run_ap_work()
> > - Rename flag to GD_FLG_SMP_READY
> > - Update the comment for run_ap_work() to explain logical_cpu_number
> > - Clarify meaning of @cpu_select in mp_run_on_cpus() comment
> >
> >  arch/x86/cpu/mp_init.c    | 107 +++++++++++++++++++++++++++++++++++---
> >  arch/x86/include/asm/mp.h |  33 ++++++++++++
> >  2 files changed, 134 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/x86/cpu/mp_init.c b/arch/x86/cpu/mp_init.c
> > index 69a23829b9..dd6d6bfab7 100644
> > --- a/arch/x86/cpu/mp_init.c
> > +++ b/arch/x86/cpu/mp_init.c
> > @@ -54,12 +54,7 @@ struct mp_flight_plan {
> >   *     callback
> >   */
> >  struct mp_callback {
> > -       /**
> > -        * func() - Function to call on the AP
> > -        *
> > -        * @arg: Argument to pass
> > -        */
> > -       void (*func)(void *arg);
> > +       mp_run_func func;
> >         void *arg;
> >         int logical_cpu_number;
> >  };
> > @@ -514,6 +509,70 @@ static void store_callback(struct mp_callback **slot, struct mp_callback *val)
> >         dmb();
> >  }
> >
> > +/**
> > + * run_ap_work() - Run a callback on selected APs
> > + *
> > + * This writes @callback to all APs and waits for them all to acknowledge it,
> > + * Note that whether each AP actually calls the callback depends on the value
> > + * of logical_cpu_number (see struct mp_callback). The logical CPU number is
> > + * the CPU device's req->seq value.
> > + *
> > + * @callback: Callback information to pass to all APs
> > + * @bsp: CPU device for the BSP
> > + * @num_cpus: The number of CPUs in the system (= number of APs + 1)
> > + * @expire_ms: Timeout to wait for all APs to finish, in milliseconds, or 0 for
> > + *     no timeout
> > + * @return 0 if OK, -ETIMEDOUT if one or more APs failed to respond in time
> > + */
> > +static int run_ap_work(struct mp_callback *callback, struct udevice *bsp,
> > +                      int num_cpus, uint expire_ms)
> > +{
> > +       int cur_cpu = bsp->req_seq;
> > +       int num_aps = num_cpus - 1; /* number of non-BSPs to get this message */
> > +       int cpus_accepted;
> > +       ulong start;
> > +       int i;
> > +
> > +       if (!IS_ENABLED(CONFIG_SMP_AP_WORK)) {
> > +               printf("APs already parked. CONFIG_SMP_AP_WORK not enabled\n");
> > +               return -ENOTSUPP;
> > +       }
> > +
> > +       /* Signal to all the APs to run the func. */
> > +       for (i = 0; i < num_cpus; i++) {
> > +               if (cur_cpu != i)
> > +                       store_callback(&ap_callbacks[i], callback);
> > +       }
> > +       mfence();
> > +
> > +       /* Wait for all the APs to signal back that call has been accepted. */
> > +       start = get_timer(0);
> > +
> > +       do {
> > +               mdelay(1);
> > +               cpus_accepted = 0;
> > +
> > +               for (i = 0; i < num_cpus; i++) {
> > +                       if (cur_cpu == i)
> > +                               continue;
> > +                       if (!read_callback(&ap_callbacks[i]))
> > +                               cpus_accepted++;
>
> It looks my previous comments were not addressed. I believe there is a
> bug here that cpus_accepted will double count for the 2nd time in the
> do {} while () loop.

I thought I replied to that. I don't see how that can happen since it
is set to zero each time around the outer loop. Can you please speed
it out?

>
> > +               }
> > +
> > +               if (expire_ms && get_timer(start) >= expire_ms) {
> > +                       log(UCLASS_CPU, LOGL_CRIT,
> > +                           "AP call expired; %d/%d CPUs accepted\n",
> > +                           cpus_accepted, num_aps);
> > +                       return -ETIMEDOUT;
> > +               }
> > +       } while (cpus_accepted != num_aps);
> > +
> > +       /* Make sure we can see any data written by the APs */
> > +       mfence();
> > +
> > +       return 0;
> > +}
> > +

Regards,
Simon
diff mbox series

Patch

diff --git a/arch/x86/cpu/mp_init.c b/arch/x86/cpu/mp_init.c
index 69a23829b9..dd6d6bfab7 100644
--- a/arch/x86/cpu/mp_init.c
+++ b/arch/x86/cpu/mp_init.c
@@ -54,12 +54,7 @@  struct mp_flight_plan {
  *	callback
  */
 struct mp_callback {
-	/**
-	 * func() - Function to call on the AP
-	 *
-	 * @arg: Argument to pass
-	 */
-	void (*func)(void *arg);
+	mp_run_func func;
 	void *arg;
 	int logical_cpu_number;
 };
@@ -514,6 +509,70 @@  static void store_callback(struct mp_callback **slot, struct mp_callback *val)
 	dmb();
 }
 
+/**
+ * run_ap_work() - Run a callback on selected APs
+ *
+ * This writes @callback to all APs and waits for them all to acknowledge it,
+ * Note that whether each AP actually calls the callback depends on the value
+ * of logical_cpu_number (see struct mp_callback). The logical CPU number is
+ * the CPU device's req->seq value.
+ *
+ * @callback: Callback information to pass to all APs
+ * @bsp: CPU device for the BSP
+ * @num_cpus: The number of CPUs in the system (= number of APs + 1)
+ * @expire_ms: Timeout to wait for all APs to finish, in milliseconds, or 0 for
+ *	no timeout
+ * @return 0 if OK, -ETIMEDOUT if one or more APs failed to respond in time
+ */
+static int run_ap_work(struct mp_callback *callback, struct udevice *bsp,
+		       int num_cpus, uint expire_ms)
+{
+	int cur_cpu = bsp->req_seq;
+	int num_aps = num_cpus - 1; /* number of non-BSPs to get this message */
+	int cpus_accepted;
+	ulong start;
+	int i;
+
+	if (!IS_ENABLED(CONFIG_SMP_AP_WORK)) {
+		printf("APs already parked. CONFIG_SMP_AP_WORK not enabled\n");
+		return -ENOTSUPP;
+	}
+
+	/* Signal to all the APs to run the func. */
+	for (i = 0; i < num_cpus; i++) {
+		if (cur_cpu != i)
+			store_callback(&ap_callbacks[i], callback);
+	}
+	mfence();
+
+	/* Wait for all the APs to signal back that call has been accepted. */
+	start = get_timer(0);
+
+	do {
+		mdelay(1);
+		cpus_accepted = 0;
+
+		for (i = 0; i < num_cpus; i++) {
+			if (cur_cpu == i)
+				continue;
+			if (!read_callback(&ap_callbacks[i]))
+				cpus_accepted++;
+		}
+
+		if (expire_ms && get_timer(start) >= expire_ms) {
+			log(UCLASS_CPU, LOGL_CRIT,
+			    "AP call expired; %d/%d CPUs accepted\n",
+			    cpus_accepted, num_aps);
+			return -ETIMEDOUT;
+		}
+	} while (cpus_accepted != num_aps);
+
+	/* Make sure we can see any data written by the APs */
+	mfence();
+
+	return 0;
+}
+
 /**
  * ap_wait_for_instruction() - Wait for and process requests from the main CPU
  *
@@ -573,6 +632,42 @@  static struct mp_flight_record mp_steps[] = {
 	MP_FR_BLOCK_APS(ap_wait_for_instruction, NULL, NULL, NULL),
 };
 
+int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg)
+{
+	struct mp_callback lcb = {
+		.func = func,
+		.arg = arg,
+		.logical_cpu_number = cpu_select,
+	};
+	struct udevice *dev;
+	int num_cpus;
+	int ret;
+
+	ret = get_bsp(&dev, &num_cpus);
+	if (ret < 0)
+		return log_msg_ret("bsp", ret);
+	if (cpu_select == MP_SELECT_ALL || cpu_select == MP_SELECT_BSP ||
+	    cpu_select == ret) {
+		/* Run on BSP first */
+		func(arg);
+	}
+
+	if (!IS_ENABLED(CONFIG_SMP_AP_WORK) ||
+	    !(gd->flags & GD_FLG_SMP_READY)) {
+		/* Allow use of this function on the BSP only */
+		if (cpu_select == MP_SELECT_BSP || !cpu_select)
+			return 0;
+		return -ENOTSUPP;
+	}
+
+	/* Allow up to 1 second for all APs to finish */
+	ret = run_ap_work(&lcb, dev, num_cpus, 1000 /* ms */);
+	if (ret)
+		return log_msg_ret("aps", ret);
+
+	return 0;
+}
+
 int mp_init(void)
 {
 	int num_aps, num_cpus;
diff --git a/arch/x86/include/asm/mp.h b/arch/x86/include/asm/mp.h
index 41b1575f4b..eb49e690f2 100644
--- a/arch/x86/include/asm/mp.h
+++ b/arch/x86/include/asm/mp.h
@@ -86,4 +86,37 @@  int mp_init(void);
 /* Set up additional CPUs */
 int x86_mp_init(void);
 
+/**
+ * mp_run_func() - Function to call on the AP
+ *
+ * @arg: Argument to pass
+ */
+typedef void (*mp_run_func)(void *arg);
+
+#if defined(CONFIG_SMP) && !CONFIG_IS_ENABLED(X86_64)
+/**
+ * mp_run_on_cpus() - Run a function on one or all CPUs
+ *
+ * This does not return until all CPUs have completed the work
+ *
+ * Running on anything other than the boot CPU is only supported if
+ * CONFIG_SMP_AP_WORK is enabled
+ *
+ * @cpu_select: CPU to run on (its dev->req_seq value), or MP_SELECT_ALL for
+ *	all, or MP_SELECT_BSP for BSP
+ * @func: Function to run
+ * @arg: Argument to pass to the function
+ * @return 0 on success, -ve on error
+ */
+int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg);
+#else
+static inline int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg)
+{
+	/* There is only one CPU, so just call the function here */
+	func(arg);
+
+	return 0;
+}
+#endif
+
 #endif /* _X86_MP_H_ */