diff mbox

[API-NEXT,PATCHv3] linux-generic: crypto: add openssl locking support for thread safety

Message ID 20170109152402.8222-1-bill.fischofer@linaro.org
State Accepted
Commit 46e088c2f56d1463a598b4ac23308e19bf2b9031
Headers show

Commit Message

Bill Fischofer Jan. 9, 2017, 3:24 p.m. UTC
Resolve Bug https://bugs.linaro.org/show_bug.cgi?id=2798 by adding
OpenSSL callbacks for locking that use ticketlocks to provide
thread-safety for OpenSSL calls made by ODP components such as random
number generation.

Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org>

---
Changes for v3:
- Move code from odp_init.c to odp_crypto.c as suggested by Petri
  and Christophe

Changes for v2:
- OPENSSL_INIT stage should precede CRYPTO_INIT stage since crypto uses OpenSSL

 platform/linux-generic/odp_crypto.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

-- 
2.9.3

Comments

Christophe Milard Jan. 11, 2017, 9:35 a.m. UTC | #1
On 2017-01-09 09:24, Bill Fischofer wrote:
> Resolve Bug https://bugs.linaro.org/show_bug.cgi?id=2798 by adding

> OpenSSL callbacks for locking that use ticketlocks to provide

> thread-safety for OpenSSL calls made by ODP components such as random

> number generation.

> 

> Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org>

> ---


I remember someone asking why this was sent agains API-next rather than
master,... I cannot remember any reason why...
Should this be a master patch?

> Changes for v3:

> - Move code from odp_init.c to odp_crypto.c as suggested by Petri

>   and Christophe

> 

> Changes for v2:

> - OPENSSL_INIT stage should precede CRYPTO_INIT stage since crypto uses OpenSSL

> 

>  platform/linux-generic/odp_crypto.c | 35 +++++++++++++++++++++++++++++++++++

>  1 file changed, 35 insertions(+)

> 

> diff --git a/platform/linux-generic/odp_crypto.c b/platform/linux-generic/odp_crypto.c

> index 5808d16..4f17fd6 100644

> --- a/platform/linux-generic/odp_crypto.c

> +++ b/platform/linux-generic/odp_crypto.c

> @@ -64,6 +64,7 @@ typedef struct odp_crypto_global_s odp_crypto_global_t;

>  

>  struct odp_crypto_global_s {

>  	odp_spinlock_t                lock;

> +	odp_ticketlock_t **openssl_lock;

>  	odp_crypto_generic_session_t *free;

>  	odp_crypto_generic_session_t  sessions[0];

>  };

> @@ -948,16 +949,35 @@ odp_crypto_operation(odp_crypto_op_param_t *param,

>  	return 0;

>  }

>  

> +static unsigned long openssl_thread_id(void)

> +{

> +	return (unsigned long)odp_thread_id();

> +}


Question here (and sorry, I should have picked that in my first review):
1) The man page I found for CRYPTO_set_id_callback() has a different
prototype:
int CRYPTO_THREADID_set_callback(void (*threadid_func)(CRYPTO_THREADID *));
 for the callback function and says:
"The implementation of this callback should not fill in id directly,
but should use CRYPTO_THREADID_set_numeric() if thread IDs are numeric"
It seems your  callback function can return directely the thread_id...

2)odp_thread_id() will return different "thread_id" for different linux
processes (when ODP threads are linux processes). Not sure how open-ssl will
react to that (e.g. a single threaded linux process using open-ssl may
return thread-id x, where x can be any value, 0, or more).
Probably it is safe as the address or errno (see below) will probably exibit
a similar behaviour...
The man page I found also state:
" If the application does not register such a callback using
CRYPTO_THREADID_set_callback(), then a default implementation is used - on
Windows and BeOS this uses the system's default thread identifying APIs,
and on all other platforms it uses the address of errno. The latter is
satisfactory for thread-safety if and only if the platform has a thread-local
error number facility."
Do we have any linux version where errno is not thread local?
If not, considering both point 1) (different prototypes with different version)
and 2) (unclear behaviour on thread_id),
isn't is safer and clearer to skip the above function and its
callback registration, and let the default behaviour be used?

I honestely think what you wrote would work, but if the default does the job
with less code, shoudn't we just go for it?

I have started testing this patch anyway :-)

Christophe

> +

> +static void openssl_lock(int mode, int n,

> +			 const char *file ODP_UNUSED,

> +			 int line ODP_UNUSED)

> +{

> +	if (mode & CRYPTO_LOCK)

> +		odp_ticketlock_lock((odp_ticketlock_t *)

> +				    &global->openssl_lock[n]);

> +	else

> +		odp_ticketlock_unlock((odp_ticketlock_t *)

> +				      &global->openssl_lock[n]);

> +}

> +

>  int

>  odp_crypto_init_global(void)

>  {

>  	size_t mem_size;

>  	odp_shm_t shm;

>  	int idx;

> +	int nlocks = CRYPTO_num_locks();

>  

>  	/* Calculate the memory size we need */

>  	mem_size  = sizeof(*global);

>  	mem_size += (MAX_SESSIONS * sizeof(odp_crypto_generic_session_t));

> +	mem_size += nlocks * sizeof(odp_ticketlock_t);

>  

>  	/* Allocate our globally shared memory */

>  	shm = odp_shm_reserve("crypto_pool", mem_size,

> @@ -975,6 +995,18 @@ odp_crypto_init_global(void)

>  	}

>  	odp_spinlock_init(&global->lock);

>  

> +	if (nlocks > 0) {

> +		global->openssl_lock =

> +			(odp_ticketlock_t **)&global->sessions[MAX_SESSIONS];

> +

> +		for (idx = 0; idx < nlocks; idx++)

> +			odp_ticketlock_init((odp_ticketlock_t *)

> +					    &global->openssl_lock[idx]);

> +

> +		CRYPTO_set_id_callback(openssl_thread_id);

> +		CRYPTO_set_locking_callback(openssl_lock);

> +	}

> +

>  	return 0;

>  }

>  

> @@ -992,6 +1024,9 @@ int odp_crypto_term_global(void)

>  		rc = -1;

>  	}

>  

> +	CRYPTO_set_locking_callback(NULL);

> +	CRYPTO_set_id_callback(NULL);

> +

>  	ret = odp_shm_free(odp_shm_lookup("crypto_pool"));

>  	if (ret < 0) {

>  		ODP_ERR("shm free failed for crypto_pool\n");

> -- 

> 2.9.3

>
Bill Fischofer Jan. 11, 2017, 12:25 p.m. UTC | #2
On Wed, Jan 11, 2017 at 3:35 AM, Christophe Milard
<christophe.milard@linaro.org> wrote:
> On 2017-01-09 09:24, Bill Fischofer wrote:

>> Resolve Bug https://bugs.linaro.org/show_bug.cgi?id=2798 by adding

>> OpenSSL callbacks for locking that use ticketlocks to provide

>> thread-safety for OpenSSL calls made by ODP components such as random

>> number generation.

>>

>> Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org>

>> ---

>

> I remember someone asking why this was sent agains API-next rather than

> master,... I cannot remember any reason why...

> Should this be a master patch?

>


It's against api-next because the code that exposes the issue (the
random changes) and your shm tests are in api-next. I figured it's
easier to test there and then merge into master since we don't have a
way to verify the bug fix in master directly.

>> Changes for v3:

>> - Move code from odp_init.c to odp_crypto.c as suggested by Petri

>>   and Christophe

>>

>> Changes for v2:

>> - OPENSSL_INIT stage should precede CRYPTO_INIT stage since crypto uses OpenSSL

>>

>>  platform/linux-generic/odp_crypto.c | 35 +++++++++++++++++++++++++++++++++++

>>  1 file changed, 35 insertions(+)

>>

>> diff --git a/platform/linux-generic/odp_crypto.c b/platform/linux-generic/odp_crypto.c

>> index 5808d16..4f17fd6 100644

>> --- a/platform/linux-generic/odp_crypto.c

>> +++ b/platform/linux-generic/odp_crypto.c

>> @@ -64,6 +64,7 @@ typedef struct odp_crypto_global_s odp_crypto_global_t;

>>

>>  struct odp_crypto_global_s {

>>       odp_spinlock_t                lock;

>> +     odp_ticketlock_t **openssl_lock;

>>       odp_crypto_generic_session_t *free;

>>       odp_crypto_generic_session_t  sessions[0];

>>  };

>> @@ -948,16 +949,35 @@ odp_crypto_operation(odp_crypto_op_param_t *param,

>>       return 0;

>>  }

>>

>> +static unsigned long openssl_thread_id(void)

>> +{

>> +     return (unsigned long)odp_thread_id();

>> +}

>

> Question here (and sorry, I should have picked that in my first review):

> 1) The man page I found for CRYPTO_set_id_callback() has a different

> prototype:

> int CRYPTO_THREADID_set_callback(void (*threadid_func)(CRYPTO_THREADID *));

>  for the callback function and says:

> "The implementation of this callback should not fill in id directly,

> but should use CRYPTO_THREADID_set_numeric() if thread IDs are numeric"

> It seems your  callback function can return directely the thread_id...


Yes, the documentation is not the best on these. I found a few
examples on the net and also examined the OpenSSL source code. This
code seems to work and since this is temporary for older versions of
OpenSSL seems simplest.

>

> 2)odp_thread_id() will return different "thread_id" for different linux

> processes (when ODP threads are linux processes). Not sure how open-ssl will

> react to that (e.g. a single threaded linux process using open-ssl may

> return thread-id x, where x can be any value, 0, or more).


All OpenSSL requires is a unique thread id and that's what
odp_thread_id() provides.

> Probably it is safe as the address or errno (see below) will probably exibit

> a similar behaviour...

> The man page I found also state:

> " If the application does not register such a callback using

> CRYPTO_THREADID_set_callback(), then a default implementation is used - on

> Windows and BeOS this uses the system's default thread identifying APIs,

> and on all other platforms it uses the address of errno. The latter is

> satisfactory for thread-safety if and only if the platform has a thread-local

> error number facility."


Not applicable as we're supplying a unique thread id.

> Do we have any linux version where errno is not thread local?

> If not, considering both point 1) (different prototypes with different version)

> and 2) (unclear behaviour on thread_id),

> isn't is safer and clearer to skip the above function and its

> callback registration, and let the default behaviour be used?

>

> I honestely think what you wrote would work, but if the default does the job

> with less code, shoudn't we just go for it?


No, again this is a temporary fix for a bug that has already been
fixed in OpenSSL 1.1.0 and above since the library itself is thread
safe. All of these APIs are already deprecated (they are no-ops in
OpenSSL 1.1.0) so these considerations don't apply. We're just
ensuring that ODP behaves in a thread-safe manner in the OpenSSL 1.0.2
series which is still distributed in Ubuntu (and perhaps other
distros). These were the original APIs and work across the widest
range of older OpenSSL releases. The various tweaks were stopgaps
until they finally did the right thing and made OpenSSL itself thread
safe.

>

> I have started testing this patch anyway :-)

>

> Christophe

>

>> +

>> +static void openssl_lock(int mode, int n,

>> +                      const char *file ODP_UNUSED,

>> +                      int line ODP_UNUSED)

>> +{

>> +     if (mode & CRYPTO_LOCK)

>> +             odp_ticketlock_lock((odp_ticketlock_t *)

>> +                                 &global->openssl_lock[n]);

>> +     else

>> +             odp_ticketlock_unlock((odp_ticketlock_t *)

>> +                                   &global->openssl_lock[n]);

>> +}

>> +

>>  int

>>  odp_crypto_init_global(void)

>>  {

>>       size_t mem_size;

>>       odp_shm_t shm;

>>       int idx;

>> +     int nlocks = CRYPTO_num_locks();

>>

>>       /* Calculate the memory size we need */

>>       mem_size  = sizeof(*global);

>>       mem_size += (MAX_SESSIONS * sizeof(odp_crypto_generic_session_t));

>> +     mem_size += nlocks * sizeof(odp_ticketlock_t);

>>

>>       /* Allocate our globally shared memory */

>>       shm = odp_shm_reserve("crypto_pool", mem_size,

>> @@ -975,6 +995,18 @@ odp_crypto_init_global(void)

>>       }

>>       odp_spinlock_init(&global->lock);

>>

>> +     if (nlocks > 0) {

>> +             global->openssl_lock =

>> +                     (odp_ticketlock_t **)&global->sessions[MAX_SESSIONS];

>> +

>> +             for (idx = 0; idx < nlocks; idx++)

>> +                     odp_ticketlock_init((odp_ticketlock_t *)

>> +                                         &global->openssl_lock[idx]);

>> +

>> +             CRYPTO_set_id_callback(openssl_thread_id);

>> +             CRYPTO_set_locking_callback(openssl_lock);

>> +     }

>> +

>>       return 0;

>>  }

>>

>> @@ -992,6 +1024,9 @@ int odp_crypto_term_global(void)

>>               rc = -1;

>>       }

>>

>> +     CRYPTO_set_locking_callback(NULL);

>> +     CRYPTO_set_id_callback(NULL);

>> +

>>       ret = odp_shm_free(odp_shm_lookup("crypto_pool"));

>>       if (ret < 0) {

>>               ODP_ERR("shm free failed for crypto_pool\n");

>> --

>> 2.9.3

>>
Christophe Milard Jan. 12, 2017, 7:48 a.m. UTC | #3
Hi Bill,

On 11 January 2017 at 13:25, Bill Fischofer <bill.fischofer@linaro.org> wrote:
> On Wed, Jan 11, 2017 at 3:35 AM, Christophe Milard

> <christophe.milard@linaro.org> wrote:

>> On 2017-01-09 09:24, Bill Fischofer wrote:

>>> Resolve Bug https://bugs.linaro.org/show_bug.cgi?id=2798 by adding

>>> OpenSSL callbacks for locking that use ticketlocks to provide

>>> thread-safety for OpenSSL calls made by ODP components such as random

>>> number generation.

>>>

>>> Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org>

>>> ---

>>

>> I remember someone asking why this was sent agains API-next rather than

>> master,... I cannot remember any reason why...

>> Should this be a master patch?

>>

>

> It's against api-next because the code that exposes the issue (the

> random changes) and your shm tests are in api-next. I figured it's

> easier to test there and then merge into master since we don't have a

> way to verify the bug fix in master directly.


ok. I am fine with that.

>

>>> Changes for v3:

>>> - Move code from odp_init.c to odp_crypto.c as suggested by Petri

>>>   and Christophe

>>>

>>> Changes for v2:

>>> - OPENSSL_INIT stage should precede CRYPTO_INIT stage since crypto uses OpenSSL

>>>

>>>  platform/linux-generic/odp_crypto.c | 35 +++++++++++++++++++++++++++++++++++

>>>  1 file changed, 35 insertions(+)

>>>

>>> diff --git a/platform/linux-generic/odp_crypto.c b/platform/linux-generic/odp_crypto.c

>>> index 5808d16..4f17fd6 100644

>>> --- a/platform/linux-generic/odp_crypto.c

>>> +++ b/platform/linux-generic/odp_crypto.c

>>> @@ -64,6 +64,7 @@ typedef struct odp_crypto_global_s odp_crypto_global_t;

>>>

>>>  struct odp_crypto_global_s {

>>>       odp_spinlock_t                lock;

>>> +     odp_ticketlock_t **openssl_lock;

>>>       odp_crypto_generic_session_t *free;

>>>       odp_crypto_generic_session_t  sessions[0];

>>>  };

>>> @@ -948,16 +949,35 @@ odp_crypto_operation(odp_crypto_op_param_t *param,

>>>       return 0;

>>>  }

>>>

>>> +static unsigned long openssl_thread_id(void)

>>> +{

>>> +     return (unsigned long)odp_thread_id();

>>> +}

>>

>> Question here (and sorry, I should have picked that in my first review):

>> 1) The man page I found for CRYPTO_set_id_callback() has a different

>> prototype:

>> int CRYPTO_THREADID_set_callback(void (*threadid_func)(CRYPTO_THREADID *));

>>  for the callback function and says:

>> "The implementation of this callback should not fill in id directly,

>> but should use CRYPTO_THREADID_set_numeric() if thread IDs are numeric"

>> It seems your  callback function can return directely the thread_id...

>

> Yes, the documentation is not the best on these. I found a few

> examples on the net and also examined the OpenSSL source code. This

> code seems to work and since this is temporary for older versions of

> OpenSSL seems simplest.

>

>>

>> 2)odp_thread_id() will return different "thread_id" for different linux

>> processes (when ODP threads are linux processes). Not sure how open-ssl will

>> react to that (e.g. a single threaded linux process using open-ssl may

>> return thread-id x, where x can be any value, 0, or more).

>

> All OpenSSL requires is a unique thread id and that's what

> odp_thread_id() provides.

>

>> Probably it is safe as the address or errno (see below) will probably exibit

>> a similar behaviour...

>> The man page I found also state:

>> " If the application does not register such a callback using

>> CRYPTO_THREADID_set_callback(), then a default implementation is used - on

>> Windows and BeOS this uses the system's default thread identifying APIs,

>> and on all other platforms it uses the address of errno. The latter is

>> satisfactory for thread-safety if and only if the platform has a thread-local

>> error number facility."

>

> Not applicable as we're supplying a unique thread id.


Confused here: "Not applicable as we're supplying a unique thread
id.". But may understanding is that if we do not supply the unique
thread ID, openssl will do it by istelf, using &errno.
What is wrong with that?

>

>> Do we have any linux version where errno is not thread local?

>> If not, considering both point 1) (different prototypes with different version)

>> and 2) (unclear behaviour on thread_id),

>> isn't is safer and clearer to skip the above function and its

>> callback registration, and let the default behaviour be used?

>>

>> I honestely think what you wrote would work, but if the default does the job

>> with less code, shoudn't we just go for it?

>

> No, again this is a temporary fix for a bug that has already been

> fixed in OpenSSL 1.1.0 and above since the library itself is thread

> safe. All of these APIs are already deprecated (they are no-ops in

> OpenSSL 1.1.0) so these considerations don't apply. We're just

> ensuring that ODP behaves in a thread-safe manner in the OpenSSL 1.0.2

> series which is still distributed in Ubuntu (and perhaps other

> distros). These were the original APIs and work across the widest

> range of older OpenSSL releases. The various tweaks were stopgaps

> until they finally did the right thing and made OpenSSL itself thread

> safe.

>

>>

>> I have started testing this patch anyway :-)

>>


Bad news: Still getting similar fault with your patch...
At this point, I think we should write a mutlithreaded radom test
without any ishm involvment at all, just to make sure we really hit
the open-ssl threadsafe issue. My question (is random/openssl
odpthreadsafe) was obviously interresting, but when seeing this crash
with your patch applyied, I am wondering whether this is really what
we hit.

git logo:
d316e29  - Bill Fischofer      : linux-generic: crypto: add openssl
locking support for thread safety
29b05df  - Christophe Milard   : linux-gen: _ishm: fixing typos
54c15b2  - Bill Fischofer      : api: pktio: pktio documentation typo correction
c237528  - Christophe Milard   : test: shm: checking exported vs
imported block length
82fe6ad  - Christophe Milard   : linux-gen: _ishm: exporting/importing
user len and flags

segfault:
Thread 14 "drvshmem_main" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fffc3d19700 (LWP 50869)]
0x00007ffff73e59fe in ?? () from /lib/x86_64-linux-gnu/libcrypto.so.1.0.0
(gdb) bt
#0  0x00007ffff73e59fe in ?? () from /lib/x86_64-linux-gnu/libcrypto.so.1.0.0
#1  0xca62c1d6ca62c1d6 in ?? ()
#2  0xca62c1d6ca62c1d6 in ?? ()
#3  0xca62c1d6ca62c1d6 in ?? ()
#4  0xca62c1d6ca62c1d6 in ?? ()
#5  0xca62c1d6ca62c1d6 in ?? ()
#6  0xca62c1d6ca62c1d6 in ?? ()
#7  0xca62c1d6ca62c1d6 in ?? ()
#8  0xca62c1d6ca62c1d6 in ?? ()
#9  0x00000000000003c5 in ?? ()
#10 0x0000000000000068 in ?? ()
#11 0x00007ffff754a6b0 in ?? () from /lib/x86_64-linux-gnu/libcrypto.so.1.0.0
#12 0x00007ffff73dae78 in CRYPTO_malloc () from
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0
#13 0x00007ffff73e29c8 in SHA1_Update () from
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0
#14 0x00007ffff74931c0 in ?? () from /lib/x86_64-linux-gnu/libcrypto.so.1.0.0
#15 0x00007ffff74935b5 in ?? () from /lib/x86_64-linux-gnu/libcrypto.so.1.0.0
#16 0x000000000040e428 in odp_random_data (buf=0x7fffc3d06ba3 "",
len=5, kind=ODP_RANDOM_BASIC) at odp_crypto.c:1015
#17 0x00000000004072f7 in run_test_stress (arg=0x7fffffffdc58) at drvshmem.c:605
#18 0x000000000040ba9e in odpthread_run_start_routine (arg=0x66b8d0
<thread_tbl>) at linux.c:275
#19 0x00007ffff6d146ba in start_thread (arg=0x7fffc3d19700) at
pthread_create.c:333
#20 0x00007ffff6a4a82d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:109

>> Christophe

>>

>>> +

>>> +static void openssl_lock(int mode, int n,

>>> +                      const char *file ODP_UNUSED,

>>> +                      int line ODP_UNUSED)

>>> +{

>>> +     if (mode & CRYPTO_LOCK)

>>> +             odp_ticketlock_lock((odp_ticketlock_t *)

>>> +                                 &global->openssl_lock[n]);

>>> +     else

>>> +             odp_ticketlock_unlock((odp_ticketlock_t *)

>>> +                                   &global->openssl_lock[n]);

>>> +}

>>> +

>>>  int

>>>  odp_crypto_init_global(void)

>>>  {

>>>       size_t mem_size;

>>>       odp_shm_t shm;

>>>       int idx;

>>> +     int nlocks = CRYPTO_num_locks();

>>>

>>>       /* Calculate the memory size we need */

>>>       mem_size  = sizeof(*global);

>>>       mem_size += (MAX_SESSIONS * sizeof(odp_crypto_generic_session_t));

>>> +     mem_size += nlocks * sizeof(odp_ticketlock_t);

>>>

>>>       /* Allocate our globally shared memory */

>>>       shm = odp_shm_reserve("crypto_pool", mem_size,

>>> @@ -975,6 +995,18 @@ odp_crypto_init_global(void)

>>>       }

>>>       odp_spinlock_init(&global->lock);

>>>

>>> +     if (nlocks > 0) {

>>> +             global->openssl_lock =

>>> +                     (odp_ticketlock_t **)&global->sessions[MAX_SESSIONS];

>>> +

>>> +             for (idx = 0; idx < nlocks; idx++)

>>> +                     odp_ticketlock_init((odp_ticketlock_t *)

>>> +                                         &global->openssl_lock[idx]);

>>> +

>>> +             CRYPTO_set_id_callback(openssl_thread_id);

>>> +             CRYPTO_set_locking_callback(openssl_lock);

>>> +     }

>>> +

>>>       return 0;

>>>  }

>>>

>>> @@ -992,6 +1024,9 @@ int odp_crypto_term_global(void)

>>>               rc = -1;

>>>       }

>>>

>>> +     CRYPTO_set_locking_callback(NULL);

>>> +     CRYPTO_set_id_callback(NULL);

>>> +

>>>       ret = odp_shm_free(odp_shm_lookup("crypto_pool"));

>>>       if (ret < 0) {

>>>               ODP_ERR("shm free failed for crypto_pool\n");

>>> --

>>> 2.9.3

>>>
Christophe Milard Jan. 13, 2017, 12:50 p.m. UTC | #4
On 9 January 2017 at 16:24, Bill Fischofer <bill.fischofer@linaro.org> wrote:
> Resolve Bug https://bugs.linaro.org/show_bug.cgi?id=2798 by adding

> OpenSSL callbacks for locking that use ticketlocks to provide

> thread-safety for OpenSSL calls made by ODP components such as random

> number generation.

>

> Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org>


Reviewed-by: Christophe Milard <christophe.milard@linaro.org>

> ---

Note that this has not proven to be working as the error root is very
unsure and hard to reproduce. But I believe it is better to have an
unknown correction rather than a known bug :-)

> Changes for v3:

> - Move code from odp_init.c to odp_crypto.c as suggested by Petri

>   and Christophe

>

> Changes for v2:

> - OPENSSL_INIT stage should precede CRYPTO_INIT stage since crypto uses OpenSSL

>

>  platform/linux-generic/odp_crypto.c | 35 +++++++++++++++++++++++++++++++++++

>  1 file changed, 35 insertions(+)

>

> diff --git a/platform/linux-generic/odp_crypto.c b/platform/linux-generic/odp_crypto.c

> index 5808d16..4f17fd6 100644

> --- a/platform/linux-generic/odp_crypto.c

> +++ b/platform/linux-generic/odp_crypto.c

> @@ -64,6 +64,7 @@ typedef struct odp_crypto_global_s odp_crypto_global_t;

>

>  struct odp_crypto_global_s {

>         odp_spinlock_t                lock;

> +       odp_ticketlock_t **openssl_lock;

>         odp_crypto_generic_session_t *free;

>         odp_crypto_generic_session_t  sessions[0];

>  };

> @@ -948,16 +949,35 @@ odp_crypto_operation(odp_crypto_op_param_t *param,

>         return 0;

>  }

>

> +static unsigned long openssl_thread_id(void)

> +{

> +       return (unsigned long)odp_thread_id();

> +}

> +

> +static void openssl_lock(int mode, int n,

> +                        const char *file ODP_UNUSED,

> +                        int line ODP_UNUSED)

> +{

> +       if (mode & CRYPTO_LOCK)

> +               odp_ticketlock_lock((odp_ticketlock_t *)

> +                                   &global->openssl_lock[n]);

> +       else

> +               odp_ticketlock_unlock((odp_ticketlock_t *)

> +                                     &global->openssl_lock[n]);

> +}

> +

>  int

>  odp_crypto_init_global(void)

>  {

>         size_t mem_size;

>         odp_shm_t shm;

>         int idx;

> +       int nlocks = CRYPTO_num_locks();

>

>         /* Calculate the memory size we need */

>         mem_size  = sizeof(*global);

>         mem_size += (MAX_SESSIONS * sizeof(odp_crypto_generic_session_t));

> +       mem_size += nlocks * sizeof(odp_ticketlock_t);

>

>         /* Allocate our globally shared memory */

>         shm = odp_shm_reserve("crypto_pool", mem_size,

> @@ -975,6 +995,18 @@ odp_crypto_init_global(void)

>         }

>         odp_spinlock_init(&global->lock);

>

> +       if (nlocks > 0) {

> +               global->openssl_lock =

> +                       (odp_ticketlock_t **)&global->sessions[MAX_SESSIONS];

> +

> +               for (idx = 0; idx < nlocks; idx++)

> +                       odp_ticketlock_init((odp_ticketlock_t *)

> +                                           &global->openssl_lock[idx]);

> +

> +               CRYPTO_set_id_callback(openssl_thread_id);

> +               CRYPTO_set_locking_callback(openssl_lock);

> +       }

> +

>         return 0;

>  }

>

> @@ -992,6 +1024,9 @@ int odp_crypto_term_global(void)

>                 rc = -1;

>         }

>

> +       CRYPTO_set_locking_callback(NULL);

> +       CRYPTO_set_id_callback(NULL);

> +

>         ret = odp_shm_free(odp_shm_lookup("crypto_pool"));

>         if (ret < 0) {

>                 ODP_ERR("shm free failed for crypto_pool\n");

> --

> 2.9.3

>
Maxim Uvarov Jan. 13, 2017, 12:57 p.m. UTC | #5
On 01/13/17 15:50, Christophe Milard wrote:
> On 9 January 2017 at 16:24, Bill Fischofer <bill.fischofer@linaro.org> wrote:

>> Resolve Bug https://bugs.linaro.org/show_bug.cgi?id=2798 by adding

>> OpenSSL callbacks for locking that use ticketlocks to provide

>> thread-safety for OpenSSL calls made by ODP components such as random

>> number generation.

>>

>> Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org>

> 

> Reviewed-by: Christophe Milard <christophe.milard@linaro.org>


Yes, I run more then 20 hours and test works, while with patch I can
reproduce it after 5 minutes.

Merged.

Maxim.


>> ---

> Note that this has not proven to be working as the error root is very

> unsure and hard to reproduce. But I believe it is better to have an

> unknown correction rather than a known bug :-)

> 

>> Changes for v3:

>> - Move code from odp_init.c to odp_crypto.c as suggested by Petri

>>   and Christophe

>>

>> Changes for v2:

>> - OPENSSL_INIT stage should precede CRYPTO_INIT stage since crypto uses OpenSSL

>>

>>  platform/linux-generic/odp_crypto.c | 35 +++++++++++++++++++++++++++++++++++

>>  1 file changed, 35 insertions(+)

>>

>> diff --git a/platform/linux-generic/odp_crypto.c b/platform/linux-generic/odp_crypto.c

>> index 5808d16..4f17fd6 100644

>> --- a/platform/linux-generic/odp_crypto.c

>> +++ b/platform/linux-generic/odp_crypto.c

>> @@ -64,6 +64,7 @@ typedef struct odp_crypto_global_s odp_crypto_global_t;

>>

>>  struct odp_crypto_global_s {

>>         odp_spinlock_t                lock;

>> +       odp_ticketlock_t **openssl_lock;

>>         odp_crypto_generic_session_t *free;

>>         odp_crypto_generic_session_t  sessions[0];

>>  };

>> @@ -948,16 +949,35 @@ odp_crypto_operation(odp_crypto_op_param_t *param,

>>         return 0;

>>  }

>>

>> +static unsigned long openssl_thread_id(void)

>> +{

>> +       return (unsigned long)odp_thread_id();

>> +}

>> +

>> +static void openssl_lock(int mode, int n,

>> +                        const char *file ODP_UNUSED,

>> +                        int line ODP_UNUSED)

>> +{

>> +       if (mode & CRYPTO_LOCK)

>> +               odp_ticketlock_lock((odp_ticketlock_t *)

>> +                                   &global->openssl_lock[n]);

>> +       else

>> +               odp_ticketlock_unlock((odp_ticketlock_t *)

>> +                                     &global->openssl_lock[n]);

>> +}

>> +

>>  int

>>  odp_crypto_init_global(void)

>>  {

>>         size_t mem_size;

>>         odp_shm_t shm;

>>         int idx;

>> +       int nlocks = CRYPTO_num_locks();

>>

>>         /* Calculate the memory size we need */

>>         mem_size  = sizeof(*global);

>>         mem_size += (MAX_SESSIONS * sizeof(odp_crypto_generic_session_t));

>> +       mem_size += nlocks * sizeof(odp_ticketlock_t);

>>

>>         /* Allocate our globally shared memory */

>>         shm = odp_shm_reserve("crypto_pool", mem_size,

>> @@ -975,6 +995,18 @@ odp_crypto_init_global(void)

>>         }

>>         odp_spinlock_init(&global->lock);

>>

>> +       if (nlocks > 0) {

>> +               global->openssl_lock =

>> +                       (odp_ticketlock_t **)&global->sessions[MAX_SESSIONS];

>> +

>> +               for (idx = 0; idx < nlocks; idx++)

>> +                       odp_ticketlock_init((odp_ticketlock_t *)

>> +                                           &global->openssl_lock[idx]);

>> +

>> +               CRYPTO_set_id_callback(openssl_thread_id);

>> +               CRYPTO_set_locking_callback(openssl_lock);

>> +       }

>> +

>>         return 0;

>>  }

>>

>> @@ -992,6 +1024,9 @@ int odp_crypto_term_global(void)

>>                 rc = -1;

>>         }

>>

>> +       CRYPTO_set_locking_callback(NULL);

>> +       CRYPTO_set_id_callback(NULL);

>> +

>>         ret = odp_shm_free(odp_shm_lookup("crypto_pool"));

>>         if (ret < 0) {

>>                 ODP_ERR("shm free failed for crypto_pool\n");

>> --

>> 2.9.3

>>
diff mbox

Patch

diff --git a/platform/linux-generic/odp_crypto.c b/platform/linux-generic/odp_crypto.c
index 5808d16..4f17fd6 100644
--- a/platform/linux-generic/odp_crypto.c
+++ b/platform/linux-generic/odp_crypto.c
@@ -64,6 +64,7 @@  typedef struct odp_crypto_global_s odp_crypto_global_t;
 
 struct odp_crypto_global_s {
 	odp_spinlock_t                lock;
+	odp_ticketlock_t **openssl_lock;
 	odp_crypto_generic_session_t *free;
 	odp_crypto_generic_session_t  sessions[0];
 };
@@ -948,16 +949,35 @@  odp_crypto_operation(odp_crypto_op_param_t *param,
 	return 0;
 }
 
+static unsigned long openssl_thread_id(void)
+{
+	return (unsigned long)odp_thread_id();
+}
+
+static void openssl_lock(int mode, int n,
+			 const char *file ODP_UNUSED,
+			 int line ODP_UNUSED)
+{
+	if (mode & CRYPTO_LOCK)
+		odp_ticketlock_lock((odp_ticketlock_t *)
+				    &global->openssl_lock[n]);
+	else
+		odp_ticketlock_unlock((odp_ticketlock_t *)
+				      &global->openssl_lock[n]);
+}
+
 int
 odp_crypto_init_global(void)
 {
 	size_t mem_size;
 	odp_shm_t shm;
 	int idx;
+	int nlocks = CRYPTO_num_locks();
 
 	/* Calculate the memory size we need */
 	mem_size  = sizeof(*global);
 	mem_size += (MAX_SESSIONS * sizeof(odp_crypto_generic_session_t));
+	mem_size += nlocks * sizeof(odp_ticketlock_t);
 
 	/* Allocate our globally shared memory */
 	shm = odp_shm_reserve("crypto_pool", mem_size,
@@ -975,6 +995,18 @@  odp_crypto_init_global(void)
 	}
 	odp_spinlock_init(&global->lock);
 
+	if (nlocks > 0) {
+		global->openssl_lock =
+			(odp_ticketlock_t **)&global->sessions[MAX_SESSIONS];
+
+		for (idx = 0; idx < nlocks; idx++)
+			odp_ticketlock_init((odp_ticketlock_t *)
+					    &global->openssl_lock[idx]);
+
+		CRYPTO_set_id_callback(openssl_thread_id);
+		CRYPTO_set_locking_callback(openssl_lock);
+	}
+
 	return 0;
 }
 
@@ -992,6 +1024,9 @@  int odp_crypto_term_global(void)
 		rc = -1;
 	}
 
+	CRYPTO_set_locking_callback(NULL);
+	CRYPTO_set_id_callback(NULL);
+
 	ret = odp_shm_free(odp_shm_lookup("crypto_pool"));
 	if (ret < 0) {
 		ODP_ERR("shm free failed for crypto_pool\n");