diff mbox

[PATCHv4] example:generator:move verbose from worker to control

Message ID 1438938017-6663-1-git-send-email-balakrishna.garapati@linaro.org
State New
Headers show

Commit Message

Balakrishna Garapati Aug. 7, 2015, 9 a.m. UTC
Signed-off-by: Balakrishna.Garapati <balakrishna.garapati@linaro.org>
---
 Made updates to print recv stats based on mode

 example/generator/odp_generator.c | 81 ++++++++++++++++++++++++++++++++-------
 1 file changed, 67 insertions(+), 14 deletions(-)

Comments

Maxim Uvarov Aug. 7, 2015, 11:36 a.m. UTC | #1
On 08/07/15 12:00, Balakrishna.Garapati wrote:
> Signed-off-by: Balakrishna.Garapati <balakrishna.garapati@linaro.org>
> ---
>   Made updates to print recv stats based on mode
>
>   example/generator/odp_generator.c | 81 ++++++++++++++++++++++++++++++++-------
>   1 file changed, 67 insertions(+), 14 deletions(-)
>
> diff --git a/example/generator/odp_generator.c b/example/generator/odp_generator.c
> index bdee222..5d1c54a 100644
> --- a/example/generator/odp_generator.c
> +++ b/example/generator/odp_generator.c
> @@ -101,6 +101,7 @@ static void usage(char *progname);
>   static int scan_ip(char *buf, unsigned int *paddr);
>   static int scan_mac(char *in, odph_ethaddr_t *des);
>   static void tv_sub(struct timeval *recvtime, struct timeval *sendtime);
> +static void print_global_stats(int num_workers);
>   
>   /**
>    * Sleep for the specified amount of milliseconds
> @@ -371,7 +372,6 @@ static odp_pktio_t create_pktio(const char *dev, odp_pool_t pool)
>   static void *gen_send_thread(void *arg)
>   {
>   	int thr;
> -	uint64_t start, now, diff;
>   	odp_pktio_t pktio;
>   	thread_args_t *thr_args;
>   	odp_queue_t outq_def;
> @@ -393,7 +393,6 @@ static void *gen_send_thread(void *arg)
>   		return NULL;
>   	}
>   
> -	start = odp_time_cycles();
>   	printf("  [%02i] created mode: SEND\n", thr);
>   	for (;;) {
>   		int err;
> @@ -434,15 +433,6 @@ static void *gen_send_thread(void *arg)
>   		    >= (unsigned int)args->appl.number) {
>   			break;
>   		}
> -
> -		now = odp_time_cycles();
> -		diff = odp_time_diff_cycles(start, now);
> -		if (odp_time_cycles_to_ns(diff) > 20 * ODP_TIME_SEC) {
> -			start = odp_time_cycles();
> -			printf("  [%02i] total send: %ju\n",
> -			       thr, odp_atomic_load_u64(&counters.seq));
> -			fflush(stdout);
> -		}
>   	}
>   
>   	/* receive number of reply pks until timeout */
> @@ -502,16 +492,16 @@ static void print_pkts(int thr, odp_packet_t pkt_tbl[], unsigned len)
>   			continue;
>   
>   		odp_atomic_inc_u64(&counters.ip);
> -		rlen += sprintf(msg, "receive Packet proto:IP ");
>   		buf = odp_packet_data(pkt);
>   		ip = (odph_ipv4hdr_t *)(buf + odp_packet_l3_offset(pkt));
> -		rlen += sprintf(msg + rlen, "id %d ",
> -				odp_be_to_cpu_16(ip->id));
>   		offset = odp_packet_l4_offset(pkt);
>   
>   		/* udp */
>   		if (ip->proto == ODPH_IPPROTO_UDP) {
>   			odp_atomic_inc_u64(&counters.udp);
> +			rlen += sprintf(msg, "receive Packet proto:IP ");
> +			rlen += sprintf(msg + rlen, "id %d ",
> +					odp_be_to_cpu_16(ip->id));
>   			udp = (odph_udphdr_t *)(buf + offset);
>   			rlen += sprintf(msg + rlen, "UDP payload %d ",
>   					odp_be_to_cpu_16(udp->length) -
> @@ -589,6 +579,67 @@ static void *gen_recv_thread(void *arg)
>   
>   	return arg;
>   }
> +
> +/**
> + * printing verbose statistics
> + *
> + */
> +static void print_global_stats(int num_workers)
> +{
> +	uint64_t start, now, diff;
> +	uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0;
> +	int verbose_interval = 20, worker_count;
> +	odp_thrmask_t thrd_mask;
> +
> +	start = odp_time_cycles();
> +	while (1) {
> +		now = odp_time_cycles();
> +		diff = odp_time_diff_cycles(start, now);
> +		if (odp_time_cycles_to_ns(diff) <
> +		    verbose_interval * ODP_TIME_SEC) {
> +			continue;
> +		}
> +
> +		start = odp_time_cycles();
> +
> +		worker_count = odp_thrmask_worker(&thrd_mask);

try to not call any odp_api in while 1 loops. Api might be itself slow 
or block
something else, depends on implementation. Here you already calculated 
number of workers
and init. Just reuse that number.
Also just looked to that calculation and there is bug:


     /* Default to system CPU count unless user specified */
     num_workers = MAX_WORKERS;
     if (args->appl.cpu_count)
         num_workers = args->appl.cpu_count;

     /* ping mode need two worker */
     if (args->appl.mode == APPL_MODE_PING)
         num_workers = 2;

     /* Get default worker cpumask */
     num_workers = odp_cpumask_def_worker(&cpumask, num_workers);
     (void)odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr));

MAX_WORKERS is not needed. It has to be just 0. In that case 
odp_cpumask_def_worker will return all available workers.

Maxim.


> +		if (worker_count < num_workers)
> +			break;
> +		if (args->appl.mode == APPL_MODE_PING) {
> +			if (worker_count == num_workers)
> +				break;
> +		}
> +
> +		if (args->appl.mode == APPL_MODE_RCV) {
> +			pkts = odp_atomic_load_u64(&counters.udp);
> +			if (pkts != 0)
> +				printf(" total receive(UDP: %" PRIu64 ")\n",
> +				       pkts);
> +			continue;
> +		}
> +
> +		if (args->appl.mode == APPL_MODE_PING) {
> +			pkts = odp_atomic_load_u64(&counters.icmp);
> +			if (pkts != 0)
> +				printf(" total receive(ICMP: %" PRIu64 ")\n",
> +				       pkts);
> +		}
> +
> +		pkts = odp_atomic_load_u64(&counters.seq);
> +		printf(" total sent: %" PRIu64 "\n", pkts);
> +
> +	if (args->appl.mode == APPL_MODE_UDP) {
> +			pps = (pkts - pkts_prev) / verbose_interval;
> +			if (pps > maximum_pps)
> +				maximum_pps = pps;
> +			printf(" %" PRIu64 " pps, %" PRIu64 " max pps\n",
> +			       pps, maximum_pps);
> +		}
> +
> +		pkts_prev = pkts;
> +	};
> +}
> +
>   /**
>    * ODP packet example main function
>    */
> @@ -796,6 +847,8 @@ int main(int argc, char *argv[])
>   		}
>   	}
>   
> +	print_global_stats(num_workers);
> +
>   	/* Master thread waits for other threads to exit */
>   	odph_linux_pthread_join(thread_tbl, num_workers);
>
Balakrishna Garapati Aug. 7, 2015, 12:23 p.m. UTC | #2
I will look into the API replacement.  If I understood your comment
correctly, I am already reusing the initial calculated num_workers and
trying to compare it to the current workers to see if any thread has been
exited. So that  we can break out from the loop. And this check happens
only after each verbose_timeout, does it still effects the performance?.

And about the  MAX_WORKERS, I kinda fixed the issue in the other patch
"[PATCHv4]
example:generator:option to supply core mask" (
http://patches.opendataplane.org/patch/2561/).

Can you look at that and give me comments which might be relevant patch for
this comment?.

/Krishna

Ma to  supply core mask

On 7 August 2015 at 13:36, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:

> On 08/07/15 12:00, Balakrishna.Garapati wrote:
>
>> Signed-off-by: Balakrishna.Garapati <balakrishna.garapati@linaro.org>
>> ---
>>   Made updates to print recv stats based on mode
>>
>>   example/generator/odp_generator.c | 81
>> ++++++++++++++++++++++++++++++++-------
>>   1 file changed, 67 insertions(+), 14 deletions(-)
>>
>> diff --git a/example/generator/odp_generator.c
>> b/example/generator/odp_generator.c
>> index bdee222..5d1c54a 100644
>> --- a/example/generator/odp_generator.c
>> +++ b/example/generator/odp_generator.c
>> @@ -101,6 +101,7 @@ static void usage(char *progname);
>>   static int scan_ip(char *buf, unsigned int *paddr);
>>   static int scan_mac(char *in, odph_ethaddr_t *des);
>>   static void tv_sub(struct timeval *recvtime, struct timeval *sendtime);
>> +static void print_global_stats(int num_workers);
>>     /**
>>    * Sleep for the specified amount of milliseconds
>> @@ -371,7 +372,6 @@ static odp_pktio_t create_pktio(const char *dev,
>> odp_pool_t pool)
>>   static void *gen_send_thread(void *arg)
>>   {
>>         int thr;
>> -       uint64_t start, now, diff;
>>         odp_pktio_t pktio;
>>         thread_args_t *thr_args;
>>         odp_queue_t outq_def;
>> @@ -393,7 +393,6 @@ static void *gen_send_thread(void *arg)
>>                 return NULL;
>>         }
>>   -     start = odp_time_cycles();
>>         printf("  [%02i] created mode: SEND\n", thr);
>>         for (;;) {
>>                 int err;
>> @@ -434,15 +433,6 @@ static void *gen_send_thread(void *arg)
>>                     >= (unsigned int)args->appl.number) {
>>                         break;
>>                 }
>> -
>> -               now = odp_time_cycles();
>> -               diff = odp_time_diff_cycles(start, now);
>> -               if (odp_time_cycles_to_ns(diff) > 20 * ODP_TIME_SEC) {
>> -                       start = odp_time_cycles();
>> -                       printf("  [%02i] total send: %ju\n",
>> -                              thr, odp_atomic_load_u64(&counters.seq));
>> -                       fflush(stdout);
>> -               }
>>         }
>>         /* receive number of reply pks until timeout */
>> @@ -502,16 +492,16 @@ static void print_pkts(int thr, odp_packet_t
>> pkt_tbl[], unsigned len)
>>                         continue;
>>                 odp_atomic_inc_u64(&counters.ip);
>> -               rlen += sprintf(msg, "receive Packet proto:IP ");
>>                 buf = odp_packet_data(pkt);
>>                 ip = (odph_ipv4hdr_t *)(buf + odp_packet_l3_offset(pkt));
>> -               rlen += sprintf(msg + rlen, "id %d ",
>> -                               odp_be_to_cpu_16(ip->id));
>>                 offset = odp_packet_l4_offset(pkt);
>>                 /* udp */
>>                 if (ip->proto == ODPH_IPPROTO_UDP) {
>>                         odp_atomic_inc_u64(&counters.udp);
>> +                       rlen += sprintf(msg, "receive Packet proto:IP ");
>> +                       rlen += sprintf(msg + rlen, "id %d ",
>> +                                       odp_be_to_cpu_16(ip->id));
>>                         udp = (odph_udphdr_t *)(buf + offset);
>>                         rlen += sprintf(msg + rlen, "UDP payload %d ",
>>                                         odp_be_to_cpu_16(udp->length) -
>> @@ -589,6 +579,67 @@ static void *gen_recv_thread(void *arg)
>>         return arg;
>>   }
>> +
>> +/**
>> + * printing verbose statistics
>> + *
>> + */
>> +static void print_global_stats(int num_workers)
>> +{
>> +       uint64_t start, now, diff;
>> +       uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0;
>> +       int verbose_interval = 20, worker_count;
>> +       odp_thrmask_t thrd_mask;
>> +
>> +       start = odp_time_cycles();
>> +       while (1) {
>> +               now = odp_time_cycles();
>> +               diff = odp_time_diff_cycles(start, now);
>> +               if (odp_time_cycles_to_ns(diff) <
>> +                   verbose_interval * ODP_TIME_SEC) {
>> +                       continue;
>> +               }
>> +
>> +               start = odp_time_cycles();
>> +
>> +               worker_count = odp_thrmask_worker(&thrd_mask);
>>
>
> try to not call any odp_api in while 1 loops. Api might be itself slow or
> block
> something else, depends on implementation. Here you already calculated
> number of workers
> and init. Just reuse that number.
> Also just looked to that calculation and there is bug:
>
>
>     /* Default to system CPU count unless user specified */
>     num_workers = MAX_WORKERS;
>     if (args->appl.cpu_count)
>         num_workers = args->appl.cpu_count;
>
>     /* ping mode need two worker */
>     if (args->appl.mode == APPL_MODE_PING)
>         num_workers = 2;
>
>     /* Get default worker cpumask */
>     num_workers = odp_cpumask_def_worker(&cpumask, num_workers);
>     (void)odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr));
>
> MAX_WORKERS is not needed. It has to be just 0. In that case
> odp_cpumask_def_worker will return all available workers.
>
> Maxim.
>
>
>
> +               if (worker_count < num_workers)
>> +                       break;
>> +               if (args->appl.mode == APPL_MODE_PING) {
>> +                       if (worker_count == num_workers)
>> +                               break;
>> +               }
>> +
>> +               if (args->appl.mode == APPL_MODE_RCV) {
>> +                       pkts = odp_atomic_load_u64(&counters.udp);
>> +                       if (pkts != 0)
>> +                               printf(" total receive(UDP: %" PRIu64
>> ")\n",
>> +                                      pkts);
>> +                       continue;
>> +               }
>> +
>> +               if (args->appl.mode == APPL_MODE_PING) {
>> +                       pkts = odp_atomic_load_u64(&counters.icmp);
>> +                       if (pkts != 0)
>> +                               printf(" total receive(ICMP: %" PRIu64
>> ")\n",
>> +                                      pkts);
>> +               }
>> +
>> +               pkts = odp_atomic_load_u64(&counters.seq);
>> +               printf(" total sent: %" PRIu64 "\n", pkts);
>> +
>> +       if (args->appl.mode == APPL_MODE_UDP) {
>> +                       pps = (pkts - pkts_prev) / verbose_interval;
>> +                       if (pps > maximum_pps)
>> +                               maximum_pps = pps;
>> +                       printf(" %" PRIu64 " pps, %" PRIu64 " max pps\n",
>> +                              pps, maximum_pps);
>> +               }
>> +
>> +               pkts_prev = pkts;
>> +       };
>> +}
>> +
>>   /**
>>    * ODP packet example main function
>>    */
>> @@ -796,6 +847,8 @@ int main(int argc, char *argv[])
>>                 }
>>         }
>>   +     print_global_stats(num_workers);
>> +
>>         /* Master thread waits for other threads to exit */
>>         odph_linux_pthread_join(thread_tbl, num_workers);
>>
>>
>
>
Maxim Uvarov Aug. 7, 2015, 1:37 p.m. UTC | #3
On 08/07/15 15:23, Krishna Garapati wrote:
> I will look into the API replacement.  If I understood your comment 
> correctly, I am already reusing the initial calculated num_workers and 
> trying to compare it to the current workers to see if any thread has 
> been exited. So that  we can break out from the loop. And this check 
> happens only after each verbose_timeout, does it still effects the 
> performance?.

Ok, I did not release first that you use  odp_thrmask_worker() to check 
how many worker threads run now. I double checked with Bill and Stuart 
that we support dynamic workers and that function can be used in that way.



>
> And about the MAX_WORKERS, I kinda fixed the issue in the other patch 
> "[PATCHv4] example:generator:option to supply core mask" 
> (http://patches.opendataplane.org/patch/2561/).
>
> Can you look at that and give me comments which might be relevant 
> patch for this comment?.
>
Ok.

Maxim.
> /Krishna
>
> Ma to  supply core mask
>
> On 7 August 2015 at 13:36, Maxim Uvarov <maxim.uvarov@linaro.org 
> <mailto:maxim.uvarov@linaro.org>> wrote:
>
>     On 08/07/15 12:00, Balakrishna.Garapati wrote:
>
>         Signed-off-by: Balakrishna.Garapati
>         <balakrishna.garapati@linaro.org
>         <mailto:balakrishna.garapati@linaro.org>>
>         ---
>           Made updates to print recv stats based on mode
>
>           example/generator/odp_generator.c | 81
>         ++++++++++++++++++++++++++++++++-------
>           1 file changed, 67 insertions(+), 14 deletions(-)
>
>         diff --git a/example/generator/odp_generator.c
>         b/example/generator/odp_generator.c
>         index bdee222..5d1c54a 100644
>         --- a/example/generator/odp_generator.c
>         +++ b/example/generator/odp_generator.c
>         @@ -101,6 +101,7 @@ static void usage(char *progname);
>           static int scan_ip(char *buf, unsigned int *paddr);
>           static int scan_mac(char *in, odph_ethaddr_t *des);
>           static void tv_sub(struct timeval *recvtime, struct timeval
>         *sendtime);
>         +static void print_global_stats(int num_workers);
>             /**
>            * Sleep for the specified amount of milliseconds
>         @@ -371,7 +372,6 @@ static odp_pktio_t create_pktio(const char
>         *dev, odp_pool_t pool)
>           static void *gen_send_thread(void *arg)
>           {
>                 int thr;
>         -       uint64_t start, now, diff;
>                 odp_pktio_t pktio;
>                 thread_args_t *thr_args;
>                 odp_queue_t outq_def;
>         @@ -393,7 +393,6 @@ static void *gen_send_thread(void *arg)
>                         return NULL;
>                 }
>           -     start = odp_time_cycles();
>                 printf("  [%02i] created mode: SEND\n", thr);
>                 for (;;) {
>                         int err;
>         @@ -434,15 +433,6 @@ static void *gen_send_thread(void *arg)
>                             >= (unsigned int)args->appl.number) {
>                                 break;
>                         }
>         -
>         -               now = odp_time_cycles();
>         -               diff = odp_time_diff_cycles(start, now);
>         -               if (odp_time_cycles_to_ns(diff) > 20 *
>         ODP_TIME_SEC) {
>         -                       start = odp_time_cycles();
>         -                       printf("  [%02i] total send: %ju\n",
>         -                              thr,
>         odp_atomic_load_u64(&counters.seq));
>         -                       fflush(stdout);
>         -               }
>                 }
>                 /* receive number of reply pks until timeout */
>         @@ -502,16 +492,16 @@ static void print_pkts(int thr,
>         odp_packet_t pkt_tbl[], unsigned len)
>                                 continue;
>                         odp_atomic_inc_u64(&counters.ip);
>         -               rlen += sprintf(msg, "receive Packet proto:IP ");
>                         buf = odp_packet_data(pkt);
>                         ip = (odph_ipv4hdr_t *)(buf +
>         odp_packet_l3_offset(pkt));
>         -               rlen += sprintf(msg + rlen, "id %d ",
>         -  odp_be_to_cpu_16(ip->id));
>                         offset = odp_packet_l4_offset(pkt);
>                         /* udp */
>                         if (ip->proto == ODPH_IPPROTO_UDP) {
>         odp_atomic_inc_u64(&counters.udp);
>         +                       rlen += sprintf(msg, "receive Packet
>         proto:IP ");
>         +                       rlen += sprintf(msg + rlen, "id %d ",
>         +  odp_be_to_cpu_16(ip->id));
>                                 udp = (odph_udphdr_t *)(buf + offset);
>                                 rlen += sprintf(msg + rlen, "UDP
>         payload %d ",
>         odp_be_to_cpu_16(udp->length) -
>         @@ -589,6 +579,67 @@ static void *gen_recv_thread(void *arg)
>                 return arg;
>           }
>         +
>         +/**
>         + * printing verbose statistics
>         + *
>         + */
>         +static void print_global_stats(int num_workers)
>         +{
>         +       uint64_t start, now, diff;
>         +       uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0;
>         +       int verbose_interval = 20, worker_count;
>         +       odp_thrmask_t thrd_mask;
>         +
>         +       start = odp_time_cycles();
>         +       while (1) {
>         +               now = odp_time_cycles();
>         +               diff = odp_time_diff_cycles(start, now);
>         +               if (odp_time_cycles_to_ns(diff) <
>         +                   verbose_interval * ODP_TIME_SEC) {
>         +                       continue;
>         +               }
>         +
>         +               start = odp_time_cycles();
>         +
>         +               worker_count = odp_thrmask_worker(&thrd_mask);
>
>
>     try to not call any odp_api in while 1 loops. Api might be itself
>     slow or block
>     something else, depends on implementation. Here you already
>     calculated number of workers
>     and init. Just reuse that number.
>     Also just looked to that calculation and there is bug:
>
>
>         /* Default to system CPU count unless user specified */
>         num_workers = MAX_WORKERS;
>         if (args->appl.cpu_count)
>             num_workers = args->appl.cpu_count;
>
>         /* ping mode need two worker */
>         if (args->appl.mode == APPL_MODE_PING)
>             num_workers = 2;
>
>         /* Get default worker cpumask */
>         num_workers = odp_cpumask_def_worker(&cpumask, num_workers);
>         (void)odp_cpumask_to_str(&cpumask, cpumaskstr,
>     sizeof(cpumaskstr));
>
>     MAX_WORKERS is not needed. It has to be just 0. In that case
>     odp_cpumask_def_worker will return all available workers.
>
>     Maxim.
>
>
>
>         +               if (worker_count < num_workers)
>         +                       break;
>         +               if (args->appl.mode == APPL_MODE_PING) {
>         +                       if (worker_count == num_workers)
>         +                               break;
>         +               }
>         +
>         +               if (args->appl.mode == APPL_MODE_RCV) {
>         +                       pkts = odp_atomic_load_u64(&counters.udp);
>         +                       if (pkts != 0)
>         +                               printf(" total receive(UDP: %"
>         PRIu64 ")\n",
>         +                                      pkts);
>         +                       continue;
>         +               }
>         +
>         +               if (args->appl.mode == APPL_MODE_PING) {
>         +                       pkts =
>         odp_atomic_load_u64(&counters.icmp);
>         +                       if (pkts != 0)
>         +                               printf(" total receive(ICMP:
>         %" PRIu64 ")\n",
>         +                                      pkts);
>         +               }
>         +
>         +               pkts = odp_atomic_load_u64(&counters.seq);
>         +               printf(" total sent: %" PRIu64 "\n", pkts);
>         +
>         +       if (args->appl.mode == APPL_MODE_UDP) {
>         +                       pps = (pkts - pkts_prev) /
>         verbose_interval;
>         +                       if (pps > maximum_pps)
>         +                               maximum_pps = pps;
>         +                       printf(" %" PRIu64 " pps, %" PRIu64 "
>         max pps\n",
>         +                              pps, maximum_pps);
>         +               }
>         +
>         +               pkts_prev = pkts;
>         +       };
>         +}
>         +
>           /**
>            * ODP packet example main function
>            */
>         @@ -796,6 +847,8 @@ int main(int argc, char *argv[])
>                         }
>                 }
>           +     print_global_stats(num_workers);
>         +
>                 /* Master thread waits for other threads to exit */
>                 odph_linux_pthread_join(thread_tbl, num_workers);
>
>
>
Balakrishna Garapati Aug. 13, 2015, 11:34 a.m. UTC | #4
Stuart, does this patch look ok ?

On 7 August 2015 at 15:37, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:

> On 08/07/15 15:23, Krishna Garapati wrote:
>
>> I will look into the API replacement.  If I understood your comment
>> correctly, I am already reusing the initial calculated num_workers and
>> trying to compare it to the current workers to see if any thread has been
>> exited. So that  we can break out from the loop. And this check happens
>> only after each verbose_timeout, does it still effects the performance?.
>>
>
> Ok, I did not release first that you use  odp_thrmask_worker() to check
> how many worker threads run now. I double checked with Bill and Stuart that
> we support dynamic workers and that function can be used in that way.
>
>
>
>
>> And about the MAX_WORKERS, I kinda fixed the issue in the other patch
>> "[PATCHv4] example:generator:option to supply core mask" (
>> http://patches.opendataplane.org/patch/2561/).
>>
>> Can you look at that and give me comments which might be relevant patch
>> for this comment?.
>>
>> Ok.
>
> Maxim.
>
>> /Krishna
>>
>> Ma to  supply core mask
>>
>> On 7 August 2015 at 13:36, Maxim Uvarov <maxim.uvarov@linaro.org <mailto:
>> maxim.uvarov@linaro.org>> wrote:
>>
>>     On 08/07/15 12:00, Balakrishna.Garapati wrote:
>>
>>         Signed-off-by: Balakrishna.Garapati
>>         <balakrishna.garapati@linaro.org
>>         <mailto:balakrishna.garapati@linaro.org>>
>>
>>         ---
>>           Made updates to print recv stats based on mode
>>
>>           example/generator/odp_generator.c | 81
>>         ++++++++++++++++++++++++++++++++-------
>>           1 file changed, 67 insertions(+), 14 deletions(-)
>>
>>         diff --git a/example/generator/odp_generator.c
>>         b/example/generator/odp_generator.c
>>         index bdee222..5d1c54a 100644
>>         --- a/example/generator/odp_generator.c
>>         +++ b/example/generator/odp_generator.c
>>         @@ -101,6 +101,7 @@ static void usage(char *progname);
>>           static int scan_ip(char *buf, unsigned int *paddr);
>>           static int scan_mac(char *in, odph_ethaddr_t *des);
>>           static void tv_sub(struct timeval *recvtime, struct timeval
>>         *sendtime);
>>         +static void print_global_stats(int num_workers);
>>             /**
>>            * Sleep for the specified amount of milliseconds
>>         @@ -371,7 +372,6 @@ static odp_pktio_t create_pktio(const char
>>         *dev, odp_pool_t pool)
>>           static void *gen_send_thread(void *arg)
>>           {
>>                 int thr;
>>         -       uint64_t start, now, diff;
>>                 odp_pktio_t pktio;
>>                 thread_args_t *thr_args;
>>                 odp_queue_t outq_def;
>>         @@ -393,7 +393,6 @@ static void *gen_send_thread(void *arg)
>>                         return NULL;
>>                 }
>>           -     start = odp_time_cycles();
>>                 printf("  [%02i] created mode: SEND\n", thr);
>>                 for (;;) {
>>                         int err;
>>         @@ -434,15 +433,6 @@ static void *gen_send_thread(void *arg)
>>                             >= (unsigned int)args->appl.number) {
>>                                 break;
>>                         }
>>         -
>>         -               now = odp_time_cycles();
>>         -               diff = odp_time_diff_cycles(start, now);
>>         -               if (odp_time_cycles_to_ns(diff) > 20 *
>>         ODP_TIME_SEC) {
>>         -                       start = odp_time_cycles();
>>         -                       printf("  [%02i] total send: %ju\n",
>>         -                              thr,
>>         odp_atomic_load_u64(&counters.seq));
>>         -                       fflush(stdout);
>>         -               }
>>                 }
>>                 /* receive number of reply pks until timeout */
>>         @@ -502,16 +492,16 @@ static void print_pkts(int thr,
>>         odp_packet_t pkt_tbl[], unsigned len)
>>                                 continue;
>>                         odp_atomic_inc_u64(&counters.ip);
>>         -               rlen += sprintf(msg, "receive Packet proto:IP ");
>>                         buf = odp_packet_data(pkt);
>>                         ip = (odph_ipv4hdr_t *)(buf +
>>         odp_packet_l3_offset(pkt));
>>         -               rlen += sprintf(msg + rlen, "id %d ",
>>         -  odp_be_to_cpu_16(ip->id));
>>                         offset = odp_packet_l4_offset(pkt);
>>                         /* udp */
>>                         if (ip->proto == ODPH_IPPROTO_UDP) {
>>         odp_atomic_inc_u64(&counters.udp);
>>         +                       rlen += sprintf(msg, "receive Packet
>>         proto:IP ");
>>         +                       rlen += sprintf(msg + rlen, "id %d ",
>>         +  odp_be_to_cpu_16(ip->id));
>>                                 udp = (odph_udphdr_t *)(buf + offset);
>>                                 rlen += sprintf(msg + rlen, "UDP
>>         payload %d ",
>>         odp_be_to_cpu_16(udp->length) -
>>         @@ -589,6 +579,67 @@ static void *gen_recv_thread(void *arg)
>>                 return arg;
>>           }
>>         +
>>         +/**
>>         + * printing verbose statistics
>>         + *
>>         + */
>>         +static void print_global_stats(int num_workers)
>>         +{
>>         +       uint64_t start, now, diff;
>>         +       uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0;
>>         +       int verbose_interval = 20, worker_count;
>>         +       odp_thrmask_t thrd_mask;
>>         +
>>         +       start = odp_time_cycles();
>>         +       while (1) {
>>         +               now = odp_time_cycles();
>>         +               diff = odp_time_diff_cycles(start, now);
>>         +               if (odp_time_cycles_to_ns(diff) <
>>         +                   verbose_interval * ODP_TIME_SEC) {
>>         +                       continue;
>>         +               }
>>         +
>>         +               start = odp_time_cycles();
>>         +
>>         +               worker_count = odp_thrmask_worker(&thrd_mask);
>>
>>
>>     try to not call any odp_api in while 1 loops. Api might be itself
>>     slow or block
>>     something else, depends on implementation. Here you already
>>     calculated number of workers
>>     and init. Just reuse that number.
>>     Also just looked to that calculation and there is bug:
>>
>>
>>         /* Default to system CPU count unless user specified */
>>         num_workers = MAX_WORKERS;
>>         if (args->appl.cpu_count)
>>             num_workers = args->appl.cpu_count;
>>
>>         /* ping mode need two worker */
>>         if (args->appl.mode == APPL_MODE_PING)
>>             num_workers = 2;
>>
>>         /* Get default worker cpumask */
>>         num_workers = odp_cpumask_def_worker(&cpumask, num_workers);
>>         (void)odp_cpumask_to_str(&cpumask, cpumaskstr,
>>     sizeof(cpumaskstr));
>>
>>     MAX_WORKERS is not needed. It has to be just 0. In that case
>>     odp_cpumask_def_worker will return all available workers.
>>
>>     Maxim.
>>
>>
>>
>>         +               if (worker_count < num_workers)
>>         +                       break;
>>         +               if (args->appl.mode == APPL_MODE_PING) {
>>         +                       if (worker_count == num_workers)
>>         +                               break;
>>         +               }
>>         +
>>         +               if (args->appl.mode == APPL_MODE_RCV) {
>>         +                       pkts = odp_atomic_load_u64(&counters.udp);
>>         +                       if (pkts != 0)
>>         +                               printf(" total receive(UDP: %"
>>         PRIu64 ")\n",
>>         +                                      pkts);
>>         +                       continue;
>>         +               }
>>         +
>>         +               if (args->appl.mode == APPL_MODE_PING) {
>>         +                       pkts =
>>         odp_atomic_load_u64(&counters.icmp);
>>         +                       if (pkts != 0)
>>         +                               printf(" total receive(ICMP:
>>         %" PRIu64 ")\n",
>>         +                                      pkts);
>>         +               }
>>         +
>>         +               pkts = odp_atomic_load_u64(&counters.seq);
>>         +               printf(" total sent: %" PRIu64 "\n", pkts);
>>         +
>>         +       if (args->appl.mode == APPL_MODE_UDP) {
>>         +                       pps = (pkts - pkts_prev) /
>>         verbose_interval;
>>         +                       if (pps > maximum_pps)
>>         +                               maximum_pps = pps;
>>         +                       printf(" %" PRIu64 " pps, %" PRIu64 "
>>         max pps\n",
>>         +                              pps, maximum_pps);
>>         +               }
>>         +
>>         +               pkts_prev = pkts;
>>         +       };
>>         +}
>>         +
>>           /**
>>            * ODP packet example main function
>>            */
>>         @@ -796,6 +847,8 @@ int main(int argc, char *argv[])
>>                         }
>>                 }
>>           +     print_global_stats(num_workers);
>>         +
>>                 /* Master thread waits for other threads to exit */
>>                 odph_linux_pthread_join(thread_tbl, num_workers);
>>
>>
>>
>>
>
Stuart Haslam Aug. 18, 2015, 2:41 p.m. UTC | #5
On Fri, Aug 07, 2015 at 11:00:17AM +0200, Balakrishna.Garapati wrote:
> Signed-off-by: Balakrishna.Garapati <balakrishna.garapati@linaro.org>
> ---
>  Made updates to print recv stats based on mode
>
>  example/generator/odp_generator.c | 81 ++++++++++++++++++++++++++++++++-------
>  1 file changed, 67 insertions(+), 14 deletions(-)
>
> diff --git a/example/generator/odp_generator.c b/example/generator/odp_generator.c
> index bdee222..5d1c54a 100644
> --- a/example/generator/odp_generator.c
> +++ b/example/generator/odp_generator.c
> @@ -101,6 +101,7 @@ static void usage(char *progname);
>  static int scan_ip(char *buf, unsigned int *paddr);
>  static int scan_mac(char *in, odph_ethaddr_t *des);
>  static void tv_sub(struct timeval *recvtime, struct timeval *sendtime);
> +static void print_global_stats(int num_workers);
>
>  /**
>   * Sleep for the specified amount of milliseconds
> @@ -371,7 +372,6 @@ static odp_pktio_t create_pktio(const char *dev, odp_pool_t pool)
>  static void *gen_send_thread(void *arg)
>  {
>   int thr;
> - uint64_t start, now, diff;
>   odp_pktio_t pktio;
>   thread_args_t *thr_args;
>   odp_queue_t outq_def;
> @@ -393,7 +393,6 @@ static void *gen_send_thread(void *arg)
>   return NULL;
>   }
>
> - start = odp_time_cycles();
>   printf("  [%02i] created mode: SEND\n", thr);
>   for (;;) {
>   int err;
> @@ -434,15 +433,6 @@ static void *gen_send_thread(void *arg)
>      >= (unsigned int)args->appl.number) {
>   break;
>   }
> -
> - now = odp_time_cycles();
> - diff = odp_time_diff_cycles(start, now);
> - if (odp_time_cycles_to_ns(diff) > 20 * ODP_TIME_SEC) {
> - start = odp_time_cycles();
> - printf("  [%02i] total send: %ju\n",
> -       thr, odp_atomic_load_u64(&counters.seq));
> - fflush(stdout);
> - }
>   }
>
>   /* receive number of reply pks until timeout */
> @@ -502,16 +492,16 @@ static void print_pkts(int thr, odp_packet_t pkt_tbl[], unsigned len)
>   continue;
>
>   odp_atomic_inc_u64(&counters.ip);
> - rlen += sprintf(msg, "receive Packet proto:IP ");
>   buf = odp_packet_data(pkt);
>   ip = (odph_ipv4hdr_t *)(buf + odp_packet_l3_offset(pkt));
> - rlen += sprintf(msg + rlen, "id %d ",
> - odp_be_to_cpu_16(ip->id));
>   offset = odp_packet_l4_offset(pkt);
>
>   /* udp */
>   if (ip->proto == ODPH_IPPROTO_UDP) {
>   odp_atomic_inc_u64(&counters.udp);
> + rlen += sprintf(msg, "receive Packet proto:IP ");
> + rlen += sprintf(msg + rlen, "id %d ",
> + odp_be_to_cpu_16(ip->id));
>   udp = (odph_udphdr_t *)(buf + offset);
>   rlen += sprintf(msg + rlen, "UDP payload %d ",
>   odp_be_to_cpu_16(udp->length) -

There's still an unconditional print for every received packet.

> @@ -589,6 +579,67 @@ static void *gen_recv_thread(void *arg)
>
>   return arg;
>  }
> +
> +/**
> + * printing verbose statistics
> + *
> + */
> +static void print_global_stats(int num_workers)
> +{
> + uint64_t start, now, diff;
> + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0;
> + int verbose_interval = 20, worker_count;
> + odp_thrmask_t thrd_mask;
> +
> + start = odp_time_cycles();
> + while (1) {
> + now = odp_time_cycles();
> + diff = odp_time_diff_cycles(start, now);
> + if (odp_time_cycles_to_ns(diff) <
> +    verbose_interval * ODP_TIME_SEC) {
> + continue;
> + }

There's no exit condition check in this loop above, so you'll wait up to
verbose_interval extra seconds unnecessarily, for example when running
with "-n 100" the workers will finish quickly.

> +
> + start = odp_time_cycles();
> +
> + worker_count = odp_thrmask_worker(&thrd_mask);

There's a potential race here as the worker thread counts get
incremented after the thread has started (rather than during the
odph_linux_pthread_create() call), and it's pretty likely this code
will be reached by the main thread before the workers have started.

Should wait until worker count has reached num_workers before entering
the loop.

> + if (worker_count < num_workers)
> + break;
> + if (args->appl.mode == APPL_MODE_PING) {
> + if (worker_count == num_workers)
> + break;
> + }
> +
> + if (args->appl.mode == APPL_MODE_RCV) {
> + pkts = odp_atomic_load_u64(&counters.udp);
> + if (pkts != 0)

This check isn't needed, I want to see how many packets have been
received even (especially) if it's 0.

> + printf(" total receive(UDP: %" PRIu64 ")\n",
> +       pkts);
> + continue;
> + }
> +
> + if (args->appl.mode == APPL_MODE_PING) {
> + pkts = odp_atomic_load_u64(&counters.icmp);
> + if (pkts != 0)

Same here.

> + printf(" total receive(ICMP: %" PRIu64 ")\n",
> +       pkts);
> + }
> +
> + pkts = odp_atomic_load_u64(&counters.seq);
> + printf(" total sent: %" PRIu64 "\n", pkts);
> +
> + if (args->appl.mode == APPL_MODE_UDP) {

Alignment problem here, need another tab.

> + pps = (pkts - pkts_prev) / verbose_interval;
> + if (pps > maximum_pps)
> + maximum_pps = pps;
> + printf(" %" PRIu64 " pps, %" PRIu64 " max pps\n",
> +       pps, maximum_pps);
> + }
> +
> + pkts_prev = pkts;

This should go inside the condition above.

> + };

Spurious ;

> +}
> +
>  /**
>   * ODP packet example main function
>   */
> @@ -796,6 +847,8 @@ int main(int argc, char *argv[])
>   }
>   }
>
> + print_global_stats(num_workers);
> +
>   /* Master thread waits for other threads to exit */
>   odph_linux_pthread_join(thread_tbl, num_workers);
>
> --
> 1.9.1
>
Balakrishna Garapati Aug. 20, 2015, 11:39 a.m. UTC | #6
>
>
> > +static void print_global_stats(int num_workers)
> > +{
> > + uint64_t start, now, diff;
> > + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0;
> > + int verbose_interval = 20, worker_count;
> > + odp_thrmask_t thrd_mask;
> > +
> > + start = odp_time_cycles();
> > + while (1) {
> > + now = odp_time_cycles();
> > + diff = odp_time_diff_cycles(start, now);
> > + if (odp_time_cycles_to_ns(diff) <
> > +    verbose_interval * ODP_TIME_SEC) {
> > + continue;
> > + }
>
> There's no exit condition check in this loop above, so you'll wait up to
> verbose_interval extra seconds unnecessarily, for example when running
> with "-n 100" the workers will finish quickly.
>
> +
> > + start = odp_time_cycles();
> > +
> > + worker_count = odp_thrmask_worker(&thrd_mask);
>
> There's a potential race here as the worker thread counts get
> incremented after the thread has started (rather than during the
> odph_linux_pthread_create() call), and it's pretty likely this code
> will be reached by the main thread before the workers have started.
>
> Should wait until worker count has reached num_workers before entering
> the loop.
>


 verbose_interval * ODP_TIME_SEC internally a sleep time before reaching
this check. Even if we introduce the worker_count == num_workers, we might
have to do a default sleep anyway to make sure we have workers up and
running otherwise we break the loop. do we still need a separate check for
this ?



> > + if (worker_count < num_workers)
> > + break;
>
> --
> Stuart.
>
Stuart Haslam Aug. 20, 2015, 12:12 p.m. UTC | #7
On Thu, Aug 20, 2015 at 01:39:29PM +0200, Krishna Garapati wrote:
> >
> >
> > > +static void print_global_stats(int num_workers)
> > > +{
> > > + uint64_t start, now, diff;
> > > + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0;
> > > + int verbose_interval = 20, worker_count;
> > > + odp_thrmask_t thrd_mask;
> > > +
> > > + start = odp_time_cycles();
> > > + while (1) {
> > > + now = odp_time_cycles();
> > > + diff = odp_time_diff_cycles(start, now);
> > > + if (odp_time_cycles_to_ns(diff) <
> > > +    verbose_interval * ODP_TIME_SEC) {
> > > + continue;
> > > + }
> >
> > There's no exit condition check in this loop above, so you'll wait up to
> > verbose_interval extra seconds unnecessarily, for example when running
> > with "-n 100" the workers will finish quickly.
> >
> > +
> > > + start = odp_time_cycles();
> > > +
> > > + worker_count = odp_thrmask_worker(&thrd_mask);
> >
> > There's a potential race here as the worker thread counts get
> > incremented after the thread has started (rather than during the
> > odph_linux_pthread_create() call), and it's pretty likely this code
> > will be reached by the main thread before the workers have started.
> >
> > Should wait until worker count has reached num_workers before entering
> > the loop.
> >
> 
> 
>  verbose_interval * ODP_TIME_SEC internally a sleep time before reaching
> this check. Even if we introduce the worker_count == num_workers, we might
> have to do a default sleep anyway to make sure we have workers up and
> running otherwise we break the loop. do we still need a separate check for
> this ?
> 

Sleeps/delays are generally a bad way to resolve race conditions, since
the race is still there but you're just avoiding it. If at some point in
the future we were to make verbose_interval configurable then it may be
exposed again. It's better to have an explicit check that the precondition
(workers started) has been met. Also as I said in the previous comment
the existing 20 second delay before entering the rest of the loop causes
other problems.

I was thinking of something like;

while (odp_thrmask_workers(&thr_mask) < num_workers) { }

while (odp_thrmask_workers(&thr_mask) == num_workers)
{
	..
}
diff mbox

Patch

diff --git a/example/generator/odp_generator.c b/example/generator/odp_generator.c
index bdee222..5d1c54a 100644
--- a/example/generator/odp_generator.c
+++ b/example/generator/odp_generator.c
@@ -101,6 +101,7 @@  static void usage(char *progname);
 static int scan_ip(char *buf, unsigned int *paddr);
 static int scan_mac(char *in, odph_ethaddr_t *des);
 static void tv_sub(struct timeval *recvtime, struct timeval *sendtime);
+static void print_global_stats(int num_workers);
 
 /**
  * Sleep for the specified amount of milliseconds
@@ -371,7 +372,6 @@  static odp_pktio_t create_pktio(const char *dev, odp_pool_t pool)
 static void *gen_send_thread(void *arg)
 {
 	int thr;
-	uint64_t start, now, diff;
 	odp_pktio_t pktio;
 	thread_args_t *thr_args;
 	odp_queue_t outq_def;
@@ -393,7 +393,6 @@  static void *gen_send_thread(void *arg)
 		return NULL;
 	}
 
-	start = odp_time_cycles();
 	printf("  [%02i] created mode: SEND\n", thr);
 	for (;;) {
 		int err;
@@ -434,15 +433,6 @@  static void *gen_send_thread(void *arg)
 		    >= (unsigned int)args->appl.number) {
 			break;
 		}
-
-		now = odp_time_cycles();
-		diff = odp_time_diff_cycles(start, now);
-		if (odp_time_cycles_to_ns(diff) > 20 * ODP_TIME_SEC) {
-			start = odp_time_cycles();
-			printf("  [%02i] total send: %ju\n",
-			       thr, odp_atomic_load_u64(&counters.seq));
-			fflush(stdout);
-		}
 	}
 
 	/* receive number of reply pks until timeout */
@@ -502,16 +492,16 @@  static void print_pkts(int thr, odp_packet_t pkt_tbl[], unsigned len)
 			continue;
 
 		odp_atomic_inc_u64(&counters.ip);
-		rlen += sprintf(msg, "receive Packet proto:IP ");
 		buf = odp_packet_data(pkt);
 		ip = (odph_ipv4hdr_t *)(buf + odp_packet_l3_offset(pkt));
-		rlen += sprintf(msg + rlen, "id %d ",
-				odp_be_to_cpu_16(ip->id));
 		offset = odp_packet_l4_offset(pkt);
 
 		/* udp */
 		if (ip->proto == ODPH_IPPROTO_UDP) {
 			odp_atomic_inc_u64(&counters.udp);
+			rlen += sprintf(msg, "receive Packet proto:IP ");
+			rlen += sprintf(msg + rlen, "id %d ",
+					odp_be_to_cpu_16(ip->id));
 			udp = (odph_udphdr_t *)(buf + offset);
 			rlen += sprintf(msg + rlen, "UDP payload %d ",
 					odp_be_to_cpu_16(udp->length) -
@@ -589,6 +579,67 @@  static void *gen_recv_thread(void *arg)
 
 	return arg;
 }
+
+/**
+ * printing verbose statistics
+ *
+ */
+static void print_global_stats(int num_workers)
+{
+	uint64_t start, now, diff;
+	uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0;
+	int verbose_interval = 20, worker_count;
+	odp_thrmask_t thrd_mask;
+
+	start = odp_time_cycles();
+	while (1) {
+		now = odp_time_cycles();
+		diff = odp_time_diff_cycles(start, now);
+		if (odp_time_cycles_to_ns(diff) <
+		    verbose_interval * ODP_TIME_SEC) {
+			continue;
+		}
+
+		start = odp_time_cycles();
+
+		worker_count = odp_thrmask_worker(&thrd_mask);
+		if (worker_count < num_workers)
+			break;
+		if (args->appl.mode == APPL_MODE_PING) {
+			if (worker_count == num_workers)
+				break;
+		}
+
+		if (args->appl.mode == APPL_MODE_RCV) {
+			pkts = odp_atomic_load_u64(&counters.udp);
+			if (pkts != 0)
+				printf(" total receive(UDP: %" PRIu64 ")\n",
+				       pkts);
+			continue;
+		}
+
+		if (args->appl.mode == APPL_MODE_PING) {
+			pkts = odp_atomic_load_u64(&counters.icmp);
+			if (pkts != 0)
+				printf(" total receive(ICMP: %" PRIu64 ")\n",
+				       pkts);
+		}
+
+		pkts = odp_atomic_load_u64(&counters.seq);
+		printf(" total sent: %" PRIu64 "\n", pkts);
+
+	if (args->appl.mode == APPL_MODE_UDP) {
+			pps = (pkts - pkts_prev) / verbose_interval;
+			if (pps > maximum_pps)
+				maximum_pps = pps;
+			printf(" %" PRIu64 " pps, %" PRIu64 " max pps\n",
+			       pps, maximum_pps);
+		}
+
+		pkts_prev = pkts;
+	};
+}
+
 /**
  * ODP packet example main function
  */
@@ -796,6 +847,8 @@  int main(int argc, char *argv[])
 		}
 	}
 
+	print_global_stats(num_workers);
+
 	/* Master thread waits for other threads to exit */
 	odph_linux_pthread_join(thread_tbl, num_workers);