Message ID | 20221006145927.32731-6-isak.westin@loytec.com |
---|---|
State | New |
Headers | show |
Series | Mesh: Fixes for PTS issues | expand |
Hi Brian, > Hi Isak, > > Have you run this patch through any runtime analysis (like valgrind > etc) to ensure it has introduced no memory leaks? > > I am particularily concerned with any l_timeout_remove() calls that don't immediately set the freed timer pointer to NULL, and any new > l_timeout_create() calls that are not preceded with a check that there is not already a valid pointer occupying the seg_timeout or msg_timeout members. > I tested it with valgrind and found no memory leaks. However, it is perhaps anyway a better practice to use l_timeout_modify() ? If so, I will update the patch. > On Thu, 2022-10-06 at 16:59 +0200, Isak Westin wrote: > > When a SAR transmission has been completed or canceled, the recipent > > should store the block authentication values for at least 10 seconds > > and ignore new segments with the same values during this period. See > > MshPRFv1.0.1 section 3.5.3.4. > > --- > > mesh/net.c | 30 ++++++++++++++++++++++++++++-- > > 1 file changed, 28 insertions(+), 2 deletions(-) > > > > diff --git a/mesh/net.c b/mesh/net.c > > index 379a6e250..e95ae5114 100644 > > --- a/mesh/net.c > > +++ b/mesh/net.c > > @@ -46,6 +46,7 @@ > > > > #define SEG_TO 2 > > #define MSG_TO 60 > > +#define SAR_DEL 10 > > > > #define DEFAULT_TRANSMIT_COUNT 1 > > #define DEFAULT_TRANSMIT_INTERVAL 100 > > @@ -166,6 +167,7 @@ struct mesh_sar { > > bool segmented; > > bool frnd; > > bool frnd_cred; > > + bool delete; > > uint8_t ttl; > > uint8_t last_seg; > > uint8_t key_aid; > > @@ -1493,13 +1495,27 @@ static void inseg_to(struct l_timeout > > *seg_timeout, void *user_data) static void inmsg_to(struct l_timeout > > *msg_timeout, void *user_data) { > > struct mesh_net *net = user_data; > > - struct mesh_sar *sar = l_queue_remove_if(net->sar_in, > > + struct mesh_sar *sar = l_queue_find(net->sar_in, > > match_msg_timeout, msg_timeout); > > > > l_timeout_remove(msg_timeout); > > if (!sar) > > return; > > > > + if (!sar->delete) { > > + /* > > + * Incomplete timer expired, cancel SAR and start > > + * delete timer > > + */ > > + sar->delete = true; > > + l_timeout_remove(sar->seg_timeout); > > + sar->seg_timeout = NULL; > > + sar->msg_timeout = l_timeout_create(SAR_DEL, > > + inmsg_to, net, NULL); > > + return; > > + } > > + > > + l_queue_remove(net->sar_in, sar); > > sar->msg_timeout = NULL; > > mesh_sar_free(sar); > > } > > @@ -1956,7 +1972,9 @@ static bool seg_rxed(struct mesh_net *net, bool > > frnd, uint32_t iv_index, > > /* Re-Send ACK for full msg */ > > send_net_ack(net, sar_in, expected); > > return true; > > - } > > + } else if (sar_in->delete) > > + /* Ignore canceled */ > > + return false; > > } else { > > uint16_t len = MAX_SEG_TO_LEN(segN); > > > > @@ -1996,6 +2014,9 @@ static bool seg_rxed(struct mesh_net *net, bool > > frnd, uint32_t iv_index, > > sar_in->len = segN * MAX_SEG_LEN + size; > > > > if (sar_in->flags == expected) { > > + /* Remove message incomplete timer */ > > + l_timeout_remove(sar_in->msg_timeout); > > + > > /* Got it all */ > > send_net_ack(net, sar_in, expected); > > > > @@ -2006,6 +2027,11 @@ static bool seg_rxed(struct mesh_net *net, bool > > frnd, uint32_t iv_index, > > /* Kill Inter-Seg timeout */ > > l_timeout_remove(sar_in->seg_timeout); > > sar_in->seg_timeout = NULL; > > + > > + /* Start delete timer */ > > + sar_in->delete = true; > > + sar_in->msg_timeout = l_timeout_create(SAR_DEL, > > + inmsg_to, net, NULL); > > return true; > > } > > > >
Hi Isak, On Fri, 2022-10-07 at 13:33 +0000, Isak Westin wrote: > Hi Brian, > > > Hi Isak, > > > > Have you run this patch through any runtime analysis (like valgrind > > etc) to ensure it has introduced no memory leaks? > > > > I am particularily concerned with any l_timeout_remove() calls that > > don't immediately set the freed timer pointer to NULL, and any new > > l_timeout_create() calls that are not preceded with a check that > > there is not already a valid pointer occupying the seg_timeout or > > msg_timeout members. > > > > I tested it with valgrind and found no memory leaks. However, it is > perhaps anyway a better practice to use l_timeout_modify() ? > If so, I will update the patch. What's important to remember about the l_timeout* functions is that they do not clean-up after themselves, you must remove them when you are done, and you need to be careful to not double-free. You can free and then create if you are more comfortable with that then l_timeout_modify, but follow the rule of thumb to set the pointers to NULL after you have freed the timer, and make sure you free the timers before creating a new one. And free the timers that have fired which you do not intend to restart. Many of the timers in the SAR system never trigger if the messages are flowing as they should, and so some potential memory leaks don't get caught by valgrind *unless* an "abnormal" timer fire occurs. We've had to address a lot of those. > > > On Thu, 2022-10-06 at 16:59 +0200, Isak Westin wrote: > > > When a SAR transmission has been completed or canceled, the > > > recipent > > > should store the block authentication values for at least 10 > > > seconds > > > and ignore new segments with the same values during this period. > > > See > > > MshPRFv1.0.1 section 3.5.3.4. > > > --- > > > mesh/net.c | 30 ++++++++++++++++++++++++++++-- > > > 1 file changed, 28 insertions(+), 2 deletions(-) > > > > > > diff --git a/mesh/net.c b/mesh/net.c > > > index 379a6e250..e95ae5114 100644 > > > --- a/mesh/net.c > > > +++ b/mesh/net.c > > > @@ -46,6 +46,7 @@ > > > > > > #define SEG_TO 2 > > > #define MSG_TO 60 > > > +#define SAR_DEL 10 > > > > > > #define DEFAULT_TRANSMIT_COUNT 1 > > > #define DEFAULT_TRANSMIT_INTERVAL 100 > > > @@ -166,6 +167,7 @@ struct mesh_sar { > > > bool segmented; > > > bool frnd; > > > bool frnd_cred; > > > + bool delete; > > > uint8_t ttl; > > > uint8_t last_seg; > > > uint8_t key_aid; > > > @@ -1493,13 +1495,27 @@ static void inseg_to(struct l_timeout > > > *seg_timeout, void *user_data) static void inmsg_to(struct > > > l_timeout > > > *msg_timeout, void *user_data) { > > > struct mesh_net *net = user_data; > > > - struct mesh_sar *sar = l_queue_remove_if(net->sar_in, > > > + struct mesh_sar *sar = l_queue_find(net->sar_in, > > > match_msg_timeout, msg_timeout); > > > > > > l_timeout_remove(msg_timeout); > > > if (!sar) > > > return; > > > > > > + if (!sar->delete) { > > > + /* > > > + * Incomplete timer expired, cancel SAR and start > > > + * delete timer > > > + */ > > > + sar->delete = true; > > > + l_timeout_remove(sar->seg_timeout); > > > + sar->seg_timeout = NULL; > > > + sar->msg_timeout = l_timeout_create(SAR_DEL, > > > + inmsg_to, net, NULL); > > > + return; > > > + } > > > + > > > + l_queue_remove(net->sar_in, sar); > > > sar->msg_timeout = NULL; > > > mesh_sar_free(sar); > > > } > > > @@ -1956,7 +1972,9 @@ static bool seg_rxed(struct mesh_net *net, > > > bool > > > frnd, uint32_t iv_index, > > > /* Re-Send ACK for full msg */ > > > send_net_ack(net, sar_in, expected); > > > return true; > > > - } > > > + } else if (sar_in->delete) > > > + /* Ignore canceled */ > > > + return false; > > > } else { > > > uint16_t len = MAX_SEG_TO_LEN(segN); > > > > > > @@ -1996,6 +2014,9 @@ static bool seg_rxed(struct mesh_net *net, > > > bool > > > frnd, uint32_t iv_index, > > > sar_in->len = segN * MAX_SEG_LEN + size; > > > > > > if (sar_in->flags == expected) { > > > + /* Remove message incomplete timer */ > > > + l_timeout_remove(sar_in->msg_timeout); > > > + > > > /* Got it all */ > > > send_net_ack(net, sar_in, expected); > > > > > > @@ -2006,6 +2027,11 @@ static bool seg_rxed(struct mesh_net *net, > > > bool > > > frnd, uint32_t iv_index, > > > /* Kill Inter-Seg timeout */ > > > l_timeout_remove(sar_in->seg_timeout); > > > sar_in->seg_timeout = NULL; > > > + > > > + /* Start delete timer */ > > > + sar_in->delete = true; > > > + sar_in->msg_timeout = l_timeout_create(SAR_DEL, > > > + inmsg_to, net, NULL); > > > return true; > > > } > > > > > > >
Hi Brian, > Hi Isak, > > On Fri, 2022-10-07 at 13:33 +0000, Isak Westin wrote: > > Hi Brian, > > > > > Hi Isak, > > > > > > Have you run this patch through any runtime analysis (like valgrind > > > etc) to ensure it has introduced no memory leaks? > > > > > > I am particularily concerned with any l_timeout_remove() calls that > > > don't immediately set the freed timer pointer to NULL, and any new > > > l_timeout_create() calls that are not preceded with a check that > > > there is not already a valid pointer occupying the seg_timeout or > > > msg_timeout members. > > > > > > > I tested it with valgrind and found no memory leaks. However, it is > > perhaps anyway a better practice to use l_timeout_modify() ? > > If so, I will update the patch. > > What's important to remember about the l_timeout* functions is > that they do not clean-up after themselves, you must remove them > when you are done, and you need to be careful to not double-free. > > You can free and then create if you are more comfortable with that > then l_timeout_modify, but follow the rule of thumb to set the > pointers to NULL after you have freed the timer, and make sure > you free the timers before creating a new one. And free the timers > that have fired which you do not intend to restart. Many of the > timers in the SAR system never trigger if the messages are flowing > as they should, and so some potential memory leaks don't get > caught by valgrind *unless* an "abnormal" timer fire occurs. > We've had to address a lot of those. > I submitted a v3 (fixed some spelling from v2) patch now where I changed to use l_timeout_modify instead. That makes more sense imo, since the timeout is in fact being reused. I tested this patch with valgrind in PTS but also in "normal" operation with a running application. If there is something additional you would wish me to test, please let me know. > > > > > On Thu, 2022-10-06 at 16:59 +0200, Isak Westin wrote: > > > > When a SAR transmission has been completed or canceled, the > > > > recipent should store the block authentication values for at least > > > > 10 seconds and ignore new segments with the same values during > > > > this period. > > > > See > > > > MshPRFv1.0.1 section 3.5.3.4. > > > > --- > > > > mesh/net.c | 30 ++++++++++++++++++++++++++++-- > > > > 1 file changed, 28 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/mesh/net.c b/mesh/net.c index 379a6e250..e95ae5114 > > > > 100644 > > > > --- a/mesh/net.c > > > > +++ b/mesh/net.c > > > > @@ -46,6 +46,7 @@ > > > > > > > > #define SEG_TO 2 > > > > #define MSG_TO 60 > > > > +#define SAR_DEL 10 > > > > > > > > #define DEFAULT_TRANSMIT_COUNT 1 > > > > #define DEFAULT_TRANSMIT_INTERVAL 100 > > > > @@ -166,6 +167,7 @@ struct mesh_sar { > > > > bool segmented; > > > > bool frnd; > > > > bool frnd_cred; > > > > + bool delete; > > > > uint8_t ttl; > > > > uint8_t last_seg; > > > > uint8_t key_aid; > > > > @@ -1493,13 +1495,27 @@ static void inseg_to(struct l_timeout > > > > *seg_timeout, void *user_data) static void inmsg_to(struct > > > > l_timeout *msg_timeout, void *user_data) { > > > > struct mesh_net *net = user_data; > > > > - struct mesh_sar *sar = l_queue_remove_if(net->sar_in, > > > > + struct mesh_sar *sar = l_queue_find(net->sar_in, > > > > match_msg_timeout, msg_timeout); > > > > > > > > l_timeout_remove(msg_timeout); > > > > if (!sar) > > > > return; > > > > > > > > + if (!sar->delete) { > > > > + /* > > > > + * Incomplete timer expired, cancel SAR and start > > > > + * delete timer > > > > + */ > > > > + sar->delete = true; > > > > + l_timeout_remove(sar->seg_timeout); > > > > + sar->seg_timeout = NULL; > > > > + sar->msg_timeout = l_timeout_create(SAR_DEL, > > > > + inmsg_to, net, NULL); > > > > + return; > > > > + } > > > > + > > > > + l_queue_remove(net->sar_in, sar); > > > > sar->msg_timeout = NULL; > > > > mesh_sar_free(sar); > > > > } > > > > @@ -1956,7 +1972,9 @@ static bool seg_rxed(struct mesh_net *net, > > > > bool frnd, uint32_t iv_index, > > > > /* Re-Send ACK for full msg */ > > > > send_net_ack(net, sar_in, expected); > > > > return true; > > > > - } > > > > + } else if (sar_in->delete) > > > > + /* Ignore canceled */ > > > > + return false; > > > > } else { > > > > uint16_t len = MAX_SEG_TO_LEN(segN); > > > > > > > > @@ -1996,6 +2014,9 @@ static bool seg_rxed(struct mesh_net *net, > > > > bool frnd, uint32_t iv_index, > > > > sar_in->len = segN * MAX_SEG_LEN + size; > > > > > > > > if (sar_in->flags == expected) { > > > > + /* Remove message incomplete timer */ > > > > + l_timeout_remove(sar_in->msg_timeout); > > > > + > > > > /* Got it all */ > > > > send_net_ack(net, sar_in, expected); > > > > > > > > @@ -2006,6 +2027,11 @@ static bool seg_rxed(struct mesh_net *net, > > > > bool frnd, uint32_t iv_index, > > > > /* Kill Inter-Seg timeout */ > > > > l_timeout_remove(sar_in->seg_timeout); > > > > sar_in->seg_timeout = NULL; > > > > + > > > > + /* Start delete timer */ > > > > + sar_in->delete = true; > > > > + sar_in->msg_timeout = l_timeout_create(SAR_DEL, > > > > + inmsg_to, net, NULL); > > > > return true; > > > > } > > > > > > > > > > > >
diff --git a/mesh/net.c b/mesh/net.c index 379a6e250..e95ae5114 100644 --- a/mesh/net.c +++ b/mesh/net.c @@ -46,6 +46,7 @@ #define SEG_TO 2 #define MSG_TO 60 +#define SAR_DEL 10 #define DEFAULT_TRANSMIT_COUNT 1 #define DEFAULT_TRANSMIT_INTERVAL 100 @@ -166,6 +167,7 @@ struct mesh_sar { bool segmented; bool frnd; bool frnd_cred; + bool delete; uint8_t ttl; uint8_t last_seg; uint8_t key_aid; @@ -1493,13 +1495,27 @@ static void inseg_to(struct l_timeout *seg_timeout, void *user_data) static void inmsg_to(struct l_timeout *msg_timeout, void *user_data) { struct mesh_net *net = user_data; - struct mesh_sar *sar = l_queue_remove_if(net->sar_in, + struct mesh_sar *sar = l_queue_find(net->sar_in, match_msg_timeout, msg_timeout); l_timeout_remove(msg_timeout); if (!sar) return; + if (!sar->delete) { + /* + * Incomplete timer expired, cancel SAR and start + * delete timer + */ + sar->delete = true; + l_timeout_remove(sar->seg_timeout); + sar->seg_timeout = NULL; + sar->msg_timeout = l_timeout_create(SAR_DEL, + inmsg_to, net, NULL); + return; + } + + l_queue_remove(net->sar_in, sar); sar->msg_timeout = NULL; mesh_sar_free(sar); } @@ -1956,7 +1972,9 @@ static bool seg_rxed(struct mesh_net *net, bool frnd, uint32_t iv_index, /* Re-Send ACK for full msg */ send_net_ack(net, sar_in, expected); return true; - } + } else if (sar_in->delete) + /* Ignore canceled */ + return false; } else { uint16_t len = MAX_SEG_TO_LEN(segN); @@ -1996,6 +2014,9 @@ static bool seg_rxed(struct mesh_net *net, bool frnd, uint32_t iv_index, sar_in->len = segN * MAX_SEG_LEN + size; if (sar_in->flags == expected) { + /* Remove message incomplete timer */ + l_timeout_remove(sar_in->msg_timeout); + /* Got it all */ send_net_ack(net, sar_in, expected); @@ -2006,6 +2027,11 @@ static bool seg_rxed(struct mesh_net *net, bool frnd, uint32_t iv_index, /* Kill Inter-Seg timeout */ l_timeout_remove(sar_in->seg_timeout); sar_in->seg_timeout = NULL; + + /* Start delete timer */ + sar_in->delete = true; + sar_in->msg_timeout = l_timeout_create(SAR_DEL, + inmsg_to, net, NULL); return true; }