[5.17,180/225] zonefs: Fix management of open zones

Message ID	20220504153126.579432514@linuxfoundation.org
State	Superseded
Headers	show Return-Path: <stable-owner@kernel.org> From: Greg Kroah-Hartman <gregkh@linuxfoundation.org> To: linux-kernel@vger.kernel.org Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable@vger.kernel.org, Damien Le Moal <damien.lemoal@opensource.wdc.com>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, Hans Holmberg <hans.holmberg@wdc.com> Subject: [PATCH 5.17 180/225] zonefs: Fix management of open zones Date: Wed, 4 May 2022 18:46:58 +0200 Message-Id: <20220504153126.579432514@linuxfoundation.org> In-Reply-To: <20220504153110.096069935@linuxfoundation.org> References: <20220504153110.096069935@linuxfoundation.org> User-Agent: quilt/0.66 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	None \| expand [5.17,002/225] floppy: disable FDRAWCMD by default [5.17,004/225] USB: quirks: add STRING quirk for VCOM device [5.17,005/225] USB: serial: whiteheat: fix heap overflow in WHITEHEAT_GET_DTR_RTS [5.17,006/225] USB: serial: cp210x: add PIDs for Kamstrup USB Meter Reader [5.17,009/225] usb: xhci: tegra:Fix PM usage reference leak of tegra_xusb_unpowergate_partitions [5.17,010/225] xhci: Enable runtime PM on second Alderlake controller [5.17,011/225] xhci: stop polling roothubs after shutdown [5.17,012/225] xhci: increase usb U3 -> U0 link resume timeout from 100ms to 500ms [5.17,014/225] iio: scd4x: check return of scd4x_write_and_fetch [5.17,015/225] iio: dac: ad5446: Fix read_raw not returning set value [5.17,017/225] iio: imu: inv_icm42600: Fix I2C init possible nack [5.17,020/225] usb: typec: ucsi: Fix reuse of completion structure [5.17,023/225] usb: gadget: configfs: clear deactivation flag in configfs_composite_unbind() [5.17,024/225] usb: dwc3: Try usb-role-switch first in dwc3_drd_init [5.17,026/225] usb: dwc3: core: Only handle soft-reset in DCTL [5.17,032/225] binder: Gracefully handle BINDER_TYPE_FDA objects with num_fds=0 [5.17,034/225] serial: imx: fix overrun interrupts in DMA mode [5.17,035/225] serial: amba-pl011: do not time out prematurely when draining tx fifo [5.17,036/225] serial: 8250: Also set sticky MCR bits in console restoration [5.17,039/225] arch_topology: Do not set llc_sibling if llc_id is invalid [5.17,041/225] ceph: fix possible NULL pointer dereference for req->r_session [5.17,042/225] bus: mhi: host: pci_generic: Add missing poweroff() PM callback [5.17,043/225] bus: mhi: host: pci_generic: Flush recovery worker during freeze [5.17,046/225] f2fs: should not truncate blocks during roll-forward recovery [5.17,047/225] hex2bin: make the function hex_to_bin constant-time [5.17,048/225] hex2bin: fix access beyond string end [5.17,052/225] x86/pci/xen: Disable PCI/MSI[-X] masking for XEN_HVM guests [5.17,055/225] cpufreq: qcom-hw: drop affinity hint before freeing the IRQ [5.17,057/225] cpufreq: qcom-hw: fix the opp entries refcounting [5.17,058/225] cpufreq: qcom-cpufreq-hw: Fix throttle frequency value on EPSS platforms [5.17,059/225] video: fbdev: udlfb: properly check endpoint type [5.17,062/225] iio: dac: ad3552r: fix signedness bug in ad3552r_reset() [5.17,066/225] tee: optee: add missing mutext_destroy in optee_ffa_probe [5.17,067/225] xsk: Fix l2fwd for copy mode + busy poll combo [5.17,071/225] ARM: dts: imx6qdl-apalis: Fix sgtl5000 detection issue [5.17,073/225] arm64: dts: imx8mq-tqma8mq: change the spi-nor tx [5.17,074/225] arm64: dts: imx8mn: Fix SAI nodes [5.17,075/225] arm64: dts: meson-sm1-bananapi-m5: fix wrong GPIO pin labeling for CON1 [5.17,076/225] phy: samsung: Fix missing of_node_put() in exynos_sata_phy_probe [5.17,078/225] ARM: OMAP2+: Fix refcount leak in omap_gic_of_init [5.17,079/225] bus: ti-sysc: Make omap3 gpt12 quirk handling SoC specific [5.17,081/225] phy: ti: omap-usb2: Fix error handling in omap_usb2_enable_clocks [5.17,085/225] phy: mapphone-mdm6600: Fix PM error handling in phy_mdm6600_probe [5.17,088/225] interconnect: qcom: sdx55: Drop IP0 interconnects [5.17,090/225] ARM: dts: am33xx-l4: Add missing touchscreen clock properties [5.17,093/225] pinctrl: qcom: sm6350: fix order of UFS & SDC pins [5.17,094/225] ipvs: correctly print the memory size of ip_vs_conn_tab [5.17,096/225] pinctrl: mediatek: moore: Fix build error [5.17,097/225] mtd: rawnand: Fix return value check of wait_for_completion_timeout [5.17,100/225] memory: renesas-rpc-if: Fix HF/OSPI data transfer in Manual Mode [5.17,102/225] netfilter: nft_set_rbtree: overlap detection with element re-addition after deletion [5.17,103/225] bpf, lwt: Fix crash when using bpf_skb_set_tunnel_key() from bpf_xmit lwt hook [5.17,106/225] pinctrl: stm32: Keep pinctrl block clock enabled when LEVEL IRQ requested [5.17,108/225] wireguard: device: check for metadata_dst with skb_valid_dst() [5.17,110/225] ARM: dts: imx6ull-colibri: fix vqmmc regulator [5.17,111/225] arm64: dts: imx8mn-ddr4-evk: Describe the 32.768 kHz PMIC clock [5.17,112/225] pinctrl: pistachio: fix use of irq_of_parse_and_map() [5.17,115/225] net: hns3: fix error log of tx/rx tqps stats [5.17,117/225] net: hns3: add validity check for message data length [5.17,119/225] net/smc: sync err code when tcp connection was refused [5.17,121/225] ip_gre: Make o_seqno start from 0 in native mode [5.17,124/225] tcp: fix potential xmit stalls caused by TCP_NOTSENT_LOWAT [5.17,126/225] bus: sunxi-rsb: Fix the return value of sunxi_rsb_device_create() [5.17,129/225] mctp: defer the kfree of object mdev->addrs [5.17,130/225] net: bcmgenet: hide status block before TX timestamping [5.17,131/225] net: phy: marvell10g: fix return value on error [5.17,132/225] net: dsa: mv88e6xxx: Fix port_hidden_wait to account for port_base_addr [5.17,134/225] ice: wait 5 s for EMP reset after firmware flash [5.17,135/225] Bluetooth: hci_event: Fix checking for invalid handle on error status [5.17,136/225] net: dsa: lantiq_gswip: Dont set GSWIP_MII_CFG_RMII_CLK [5.17,139/225] netfilter: nf_conntrack_tcp: re-init for syn packets only [5.17,143/225] drm/amdkfd: Fix GWS queue count [5.17,144/225] drm/amd/display: Fix memory leak in dcn21_clock_source_create [5.17,146/225] bnx2x: fix napi API usage sequence [5.17,149/225] gfs2: Make sure not to return short direct writes [5.17,150/225] gfs2: No short reads or writes upon glock contention [5.17,153/225] net: enetc: allow tc-etf offload even with NETIF_F_CSUM_MASK [5.17,155/225] tcp: fix F-RTO may not work correctly when receiving DSACK [5.17,156/225] io_uring: fix uninitialized field in rw io_kiocb [5.17,158/225] ASoC: cs35l41: Fix a shift-out-of-bounds warning found by UBSAN [5.17,161/225] ASoC: wm8731: Disable the regulator when probing fails [5.17,162/225] ASoC: Intel: sof_es8336: Add a quirk for Huawei Matebook D15 [5.17,163/225] Input: cypress-sf - register a callback to disable the regulators [5.17,164/225] ext4: fix bug_on in start_this_handle during umount filesystem [5.17,165/225] arch: xtensa: platforms: Fix deadlock in rs_close() [5.17,166/225] ksmbd: increment reference count of parent fp [5.17,168/225] erofs: fix use-after-free of on-stack io[] [5.17,169/225] bonding: do not discard lowest hash bit for non layer3+4 hashing [5.17,172/225] drivers: net: hippi: Fix deadlock in rr_close() [5.17,173/225] powerpc/perf: Fix 32bit compile [5.17,179/225] Revert "block: inherit request start time from bio for BLK_CGROUP" [5.17,180/225] zonefs: Fix management of open zones [5.17,181/225] zonefs: Clear inode information flags on inode creation [5.17,183/225] mtd: rawnand: qcom: fix memory corruption that causes panic [5.17,185/225] drm/amdgpu: dont runtime suspend if there are displays attached (v3) [5.17,187/225] drm/i915: Fix SEL_FETCH_PLANE_*(PIPE_B+) register addresses [5.17,188/225] net: ethernet: stmmac: fix write to sgmii_adapter_base [5.17,191/225] btrfs: fix direct I/O read repair for split bios [5.17,192/225] btrfs: fix direct I/O writes for split bios on zoned devices [5.17,193/225] btrfs: fix leaked plug after failure syncing log on zoned filesystems [5.17,198/225] perf symbol: Pass is_kallsyms to symbols__fixup_end() [5.17,203/225] tty: n_gsm: fix decoupled mux resource [5.17,204/225] tty: n_gsm: fix mux cleanup after unregister tty device [5.17,208/225] netfilter: nft_socket: only do sk lookups when indev is available [5.17,213/225] tty: n_gsm: fix wrong command frame length field encoding [5.17,216/225] tty: n_gsm: fix reset fifo race condition [5.17,219/225] tty: n_gsm: fix broken virtual tty handling [5.17,220/225] tty: n_gsm: fix invalid use of MSC in advanced option [5.17,221/225] tty: n_gsm: fix software flow control handling [5.17,222/225] tty: n_gsm: fix sometimes uninitialized warning in gsm_dlci_modem_output()

Message ID

20220504153126.579432514@linuxfoundation.org

State

Superseded

Headers

From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
        stable@vger.kernel.org,
        Damien Le Moal <damien.lemoal@opensource.wdc.com>,
        Johannes Thumshirn <johannes.thumshirn@wdc.com>,
        Hans Holmberg <hans.holmberg@wdc.com>
Subject: [PATCH 5.17 180/225] zonefs: Fix management of open zones
Date: Wed,  4 May 2022 18:46:58 +0200
Message-Id: <20220504153126.579432514@linuxfoundation.org>
In-Reply-To: <20220504153110.096069935@linuxfoundation.org>
References: <20220504153110.096069935@linuxfoundation.org>
User-Agent: quilt/0.66
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Precedence: bulk

Series

None | expand

Commit Message

Greg Kroah-Hartman May 4, 2022, 4:46 p.m. UTC

From: Damien Le Moal <damien.lemoal@opensource.wdc.com>

commit 1da18a296f5ba4f99429e62a7cf4fdbefa598902 upstream.

The mount option "explicit_open" manages the device open zone
resources to ensure that if an application opens a sequential file for
writing, the file zone can always be written by explicitly opening
the zone and accounting for that state with the s_open_zones counter.

However, if some zones are already open when mounting, the device open
zone resource usage status will be larger than the initial s_open_zones
value of 0. Ensure that this inconsistency does not happen by closing
any sequential zone that is open when mounting.

Furthermore, with ZNS drives, closing an explicitly open zone that has
not been written will change the zone state to "closed", that is, the
zone will remain in an active state. Since this can then cause failures
of explicit open operations on other zones if the drive active zone
resources are exceeded, we need to make sure that the zone is not
active anymore by resetting it instead of closing it. To address this,
zonefs_zone_mgmt() is modified to change a REQ_OP_ZONE_CLOSE request
into a REQ_OP_ZONE_RESET for sequential zones that have not been
written.

Fixes: b5c00e975779 ("zonefs: open/close zone on file open/close")
Cc: <stable@vger.kernel.org>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Hans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/zonefs/super.c |   45 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 5 deletions(-)

--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -35,6 +35,17 @@  static inline int zonefs_zone_mgmt(struc
 
 	lockdep_assert_held(&zi->i_truncate_mutex);
 
+	/*
+	 * With ZNS drives, closing an explicitly open zone that has not been
+	 * written will change the zone state to "closed", that is, the zone
+	 * will remain active. Since this can then cause failure of explicit
+	 * open operation on other zones if the drive active zone resources
+	 * are exceeded, make sure that the zone does not remain active by
+	 * resetting it.
+	 */
+	if (op == REQ_OP_ZONE_CLOSE && !zi->i_wpoffset)
+		op = REQ_OP_ZONE_RESET;
+
 	trace_zonefs_zone_mgmt(inode, op);
 	ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector,
 			       zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS);
@@ -1295,12 +1306,13 @@  static void zonefs_init_dir_inode(struct
 	inc_nlink(parent);
 }
 
-static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
-				   enum zonefs_ztype type)
+static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
+				  enum zonefs_ztype type)
 {
 	struct super_block *sb = inode->i_sb;
 	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+	int ret = 0;
 
 	inode->i_ino = zone->start >> sbi->s_zone_sectors_shift;
 	inode->i_mode = S_IFREG | sbi->s_perm;
@@ -1325,6 +1337,22 @@  static void zonefs_init_file_inode(struc
 	sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes);
 	sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits;
 	sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits;
+
+	/*
+	 * For sequential zones, make sure that any open zone is closed first
+	 * to ensure that the initial number of open zones is 0, in sync with
+	 * the open zone accounting done when the mount option
+	 * ZONEFS_MNTOPT_EXPLICIT_OPEN is used.
+	 */
+	if (type == ZONEFS_ZTYPE_SEQ &&
+	    (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
+	     zone->cond == BLK_ZONE_COND_EXP_OPEN)) {
+		mutex_lock(&zi->i_truncate_mutex);
+		ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
+		mutex_unlock(&zi->i_truncate_mutex);
+	}
+
+	return ret;
 }
 
 static struct dentry *zonefs_create_inode(struct dentry *parent,
@@ -1334,6 +1362,7 @@  static struct dentry *zonefs_create_inod
 	struct inode *dir = d_inode(parent);
 	struct dentry *dentry;
 	struct inode *inode;
+	int ret;
 
 	dentry = d_alloc_name(parent, name);
 	if (!dentry)
@@ -1344,10 +1373,16 @@  static struct dentry *zonefs_create_inod
 		goto dput;
 
 	inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
-	if (zone)
-		zonefs_init_file_inode(inode, zone, type);
-	else
+	if (zone) {
+		ret = zonefs_init_file_inode(inode, zone, type);
+		if (ret) {
+			iput(inode);
+			goto dput;
+		}
+	} else {
 		zonefs_init_dir_inode(dir, inode, type);
+	}
+
 	d_add(dentry, inode);
 	dir->i_size++;

[5.17,180/225] zonefs: Fix management of open zones

Commit Message

Patch