diff mbox series

[RFC] i2c: Fix deadlock on adapter removal

Message ID 71a49125-54a7-4705-b54a-a5e7cc54a78b@gmail.com
State Superseded
Headers show
Series [RFC] i2c: Fix deadlock on adapter removal | expand

Commit Message

Heiner Kallweit Feb. 4, 2025, 7:54 p.m. UTC
i2c_del_adapter() can be called recursively if it has an i2c mux on
the bus. This results in a deadlock.
We use the lock to protect from parallel unregistering of clients in
case driver and adapter are removed at the same time.
The fix approach is based on the fact that the used iterators are
klist-based. So it's safe to remove list elements during the iteration,
and we don't have to acquire the core lock.
As a result we just have to prevent that i2c_unregister_device() is
executed in parallel for the same client. Use an atomic bit op for this
purpose.

Fixes: 56a50667cbcf ("i2c: Replace list-based mechanism for handling auto-detected clients")
Reported-by: Herve Codina <herve.codina@bootlin.com>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/i2c/i2c-core-base.c | 9 +++------
 include/linux/i2c.h         | 3 ++-
 2 files changed, 5 insertions(+), 7 deletions(-)

Comments

Heiner Kallweit Feb. 5, 2025, 1:50 p.m. UTC | #1
On 05.02.2025 09:07, Herve Codina wrote:
> Hi Heiner,
> 
> Cc Luca and Thomas (they are interested in the issue).
> 
> On Tue, 4 Feb 2025 20:54:57 +0100
> Heiner Kallweit <hkallweit1@gmail.com> wrote:
> 
>> i2c_del_adapter() can be called recursively if it has an i2c mux on
>> the bus. This results in a deadlock.
>> We use the lock to protect from parallel unregistering of clients in
>> case driver and adapter are removed at the same time.
>> The fix approach is based on the fact that the used iterators are
>> klist-based. So it's safe to remove list elements during the iteration,
>> and we don't have to acquire the core lock.
>> As a result we just have to prevent that i2c_unregister_device() is
>> executed in parallel for the same client. Use an atomic bit op for this
>> purpose.
>>
>> Fixes: 56a50667cbcf ("i2c: Replace list-based mechanism for handling auto-detected clients")
>> Reported-by: Herve Codina <herve.codina@bootlin.com>
>> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
> 
> I tested the patch.
> 
> The deadlock is no more present but I have this new issue using the exact
> same command as the one I used to detect the deadlock:
> 
>     # echo 30a40000.i2c > /sys/bus/platform/drivers/imx-i2c/unbind
>     [   35.221250] Unable to handle ker
>     ** replaying previous printk message **
>     [   35.221250] Unable to handle kernel paging request at virtual address 70ffff8000818b85
>     [   35.221298] Mem abort info:
>     [   35.221330]   ESR = 0x0000000096000005
>     [   35.221347]   EC = 0x25: DABT (current EL), IL = 32 bits
>     [   35.221364]   SET = 0, FnV = 0
>     [   35.221381]   EA = 0, S1PTW = 0
>     [   35.221398]   FSC = 0x05: level 1 translation fault
>     [   35.221415] Data abort info:
>     [   35.221428]   ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
>     [   35.221445]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
>     [   35.221464]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
>     [   35.221484] [70ffff8000818b85] address between user and kernel address ranges
>     [   35.221806] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP
>     [   35.291401] Modules linked in: fsl_ldb imx8mp_interconnect imx_interconnect leds_pca963x imx_cpufreq_dt imx8mm_thermal lm75 tmp103 rtc_snvs     snvs_pwrkey rtc_rs5c372 pwm_imx27 st_pressure_spi st_sensors_spi regmap_spi st_pressure_i2c st_pressure st_sensors_i2c industrialio_triggered_buffer kfifo_buf st_sensors iio_hwmon gpio_charger led_bl panel_simple opt3001 governor_userspace imx_bus imx8mp_hdmi_tx dw_hdmi dwmac_imx stmmac_platform stmmac pcs_xpcs phylink samsung_dsim imx_sdma imx_lcdif drm_dma_helper imx8mp_hdmi_pvi fsl_imx8_ddr_perf caam exc3000 error pwm_bl ti_sn65dsi83 hotplug_bridge drm_display_helper drm_kms_helper drm drm_panel_orientation_quirks backlight gehc_sunh_connector
>     [   35.352122] CPU: 1 UID: 0 PID: 294 Comm: sh Not tainted 6.14.0-rc1+ #24
>     [   35.358743] Hardware name: HCO board (DT)
>     [   35.365273] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
>     [   35.372239] pc : sysfs_remove_groups+0x20/0x70
>     [   35.376695] lr : device_remove_attrs+0xb8/0x100
>     [   35.381237] sp : ffff8000834db9a0
>     [   35.384551] x29: ffff8000834db9a0 x28: ffff000000e58000 x27: 0000000000000000
>     [   35.391700] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
>     [   35.398850] x23: ffff00007bc611b0 x22: ffff800080c4fb01 x21: 0000000000000000
>     [   35.405997] x20: 70ffff8000818b85 x19: ffff00007bc72028 x18: 0000000000000000
>     [   35.413144] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000028
>     [   35.420290] x14: 0000000000000000 x13: 0000000000008d0d x12: ffff000000e58918
>     [   35.427437] x11: ffff8000820618a8 x10: 0000000000000000 x9 : ffff800080733d08
>     [   35.434585] x8 : 0101010101010101 x7 : 7f7f7f7f7f7f7f7f x6 : 647165746f623200
>     [   35.441733] x5 : ffff000000e58000 x4 : 0000000029dd16fe x3 : 0000000000000000
>     [   35.448881] x2 : 0000000000000000 x1 : 70ffff8000818b85 x0 : ffff00007bc72028
>     [   35.456030] Call trace:
>     [   35.458478]  sysfs_remove_groups+0x20/0x70 (P)
>     [   35.462932]  device_remove_attrs+0xb8/0x100
>     [   35.467123]  device_del+0x144/0x410
>     [   35.470616]  device_unregister+0x20/0x48
>     [   35.474544]  i2c_unregister_device.part.0+0x64/0xc0
>     [   35.479433]  __unregister_client+0x74/0x80
>     [   35.483539]  device_for_each_child+0x68/0xc8
>     [   35.487818]  i2c_del_adapter+0xac/0x198
>     [   35.491663]  i2c_imx_remove+0x4c/0x190
>     [   35.495419]  platform_remove+0x30/0x58
>     [   35.499179]  device_remove+0x54/0x90
>     [   35.502762]  device_release_driver_internal+0x1e8/0x250
>     [   35.507994]  device_driver_detach+0x20/0x38
>     [   35.512185]  unbind_store+0xbc/0xc8
>     [   35.515680]  drv_attr_store+0x2c/0x48
>     [   35.519349]  sysfs_kf_write+0x54/0x88
>     [   35.523018]  kernfs_fop_write_iter+0x128/0x1c8
>     [   35.527468]  vfs_write+0x290/0x398
>     [   35.530876]  ksys_write+0x70/0x110
>     [   35.534284]  __arm64_sys_write+0x24/0x38
>     [   35.538212]  invoke_syscall+0x50/0x120
>     [   35.541972]  el0_svc_common.constprop.0+0x48/0xf8
>     [   35.546686]  do_el0_svc+0x28/0x40
>     [   35.550009]  el0_svc+0x48/0x110
>     [   35.553159]  el0t_64_sync_handler+0x144/0x168
>     [   35.557522]  el0t_64_sync+0x198/0x1a0
>     [   35.561198] Code: a9bd7bfd 910003fd a90153f3 aa0103f4 (f9400021) 
>     [   35.567295] ---[ end trace 0000000000000000 ]---
> 
> I really don't know if this new issue is related to 56a50667cbcf ("i2c:
> Replace list-based mechanism for handling auto-detected clients") or some
> other patches but for sure it was not present in v6.13.
> 
> Can you have a look?
> 

Dumb error on my side. I2C_CLIENT_UNREGISTERING has to be a bit number.
Just sent a v2 of the patch.

> Feel free to ask for more information or tests I can do to help if needed.
> 
> Best regards,
> Hervé
diff mbox series

Patch

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 6eade239f..44b861eb4 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -1059,6 +1059,9 @@  void i2c_unregister_device(struct i2c_client *client)
 	if (IS_ERR_OR_NULL(client))
 		return;
 
+	if (test_and_set_bit(I2C_CLIENT_UNREGISTERING, &client->flags))
+		return;
+
 	if (client->dev.of_node) {
 		of_node_clear_flag(client->dev.of_node, OF_POPULATED);
 		of_node_put(client->dev.of_node);
@@ -1354,7 +1357,6 @@  delete_device_store(struct device *dev, struct device_attribute *attr,
 		return -EINVAL;
 	}
 
-	mutex_lock(&core_lock);
 	/* Make sure the device was added through sysfs */
 	child_dev = device_find_child(&adap->dev, &addr, __i2c_find_user_addr);
 	if (child_dev) {
@@ -1364,7 +1366,6 @@  delete_device_store(struct device *dev, struct device_attribute *attr,
 		dev_err(dev, "Can't find userspace-created device at %#x\n", addr);
 		count = -ENOENT;
 	}
-	mutex_unlock(&core_lock);
 
 	return count;
 }
@@ -1745,10 +1746,8 @@  void i2c_del_adapter(struct i2c_adapter *adap)
 	 * we can't remove the dummy devices during the first pass: they
 	 * could have been instantiated by real devices wishing to clean
 	 * them up properly, so we give them a chance to do that first. */
-	mutex_lock(&core_lock);
 	device_for_each_child(&adap->dev, NULL, __unregister_client);
 	device_for_each_child(&adap->dev, NULL, __unregister_dummy);
-	mutex_unlock(&core_lock);
 
 	/* device name is gone after device_unregister */
 	dev_dbg(&adap->dev, "adapter [%s] unregistered\n", adap->name);
@@ -2002,12 +2001,10 @@  static int __i2c_unregister_detected_client(struct device *dev, void *argp)
  */
 void i2c_del_driver(struct i2c_driver *driver)
 {
-	mutex_lock(&core_lock);
 	/* Satisfy __must_check, function can't fail */
 	if (driver_for_each_device(&driver->driver, NULL, NULL,
 				   __i2c_unregister_detected_client)) {
 	}
-	mutex_unlock(&core_lock);
 
 	driver_unregister(&driver->driver);
 	pr_debug("driver [%s] unregistered\n", driver->driver.name);
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index c31fd1dba..c9b6c0f50 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -325,7 +325,7 @@  struct i2c_driver {
  * managing the device.
  */
 struct i2c_client {
-	unsigned short flags;		/* div., see below		*/
+	unsigned long flags;		/* div., see below		*/
 #define I2C_CLIENT_PEC		0x04	/* Use Packet Error Checking */
 #define I2C_CLIENT_TEN		0x10	/* we have a ten bit chip address */
 					/* Must equal I2C_M_TEN below */
@@ -334,6 +334,7 @@  struct i2c_client {
 #define I2C_CLIENT_WAKE		0x80	/* for board_info; true iff can wake */
 #define I2C_CLIENT_AUTO		0x100	/* client was auto-detected */
 #define I2C_CLIENT_USER		0x200	/* client was userspace-created */
+#define I2C_CLIENT_UNREGISTERING 0x400	/* client is being unregistered */
 #define I2C_CLIENT_SCCB		0x9000	/* Use Omnivision SCCB protocol */
 					/* Must match I2C_M_STOP|IGNORE_NAK */