Message ID | 20201127133307.2969817-1-steen.hegelund@microchip.com |
---|---|
Headers | show |
Series | net: Adding the Sparx5 Switch Driver | expand |
This is a very large driver, which is going to make it slow to review. > +static int sparx5_probe_port(struct sparx5 *sparx5, > + struct device_node *portnp, > + struct phy *serdes, > + u32 portno, > + struct sparx5_port_config *conf) > +{ > + phy_interface_t phy_mode = conf->phy_mode; > + struct sparx5_port *spx5_port; > + struct net_device *ndev; > + struct phylink *phylink; > + int err; > + > + err = sparx5_create_targets(sparx5); > + if (err) > + return err; > + ndev = sparx5_create_netdev(sparx5, portno); > + if (IS_ERR(ndev)) { > + dev_err(sparx5->dev, "Could not create net device: %02u\n", portno); > + return PTR_ERR(ndev); > + } > + spx5_port = netdev_priv(ndev); > + spx5_port->of_node = portnp; > + spx5_port->serdes = serdes; > + spx5_port->pvid = NULL_VID; > + spx5_port->signd_internal = true; > + spx5_port->signd_active_high = true; > + spx5_port->signd_enable = true; > + spx5_port->flow_control = false; > + spx5_port->max_vlan_tags = SPX5_PORT_MAX_TAGS_NONE; > + spx5_port->vlan_type = SPX5_VLAN_PORT_TYPE_UNAWARE; > + spx5_port->custom_etype = 0x8880; /* Vitesse */ > + conf->portmode = conf->phy_mode; > + spx5_port->conf.speed = SPEED_UNKNOWN; > + spx5_port->conf.power_down = true; > + sparx5->ports[portno] = spx5_port; > +struct net_device *sparx5_create_netdev(struct sparx5 *sparx5, u32 portno) > +{ > + struct net_device *ndev; > + struct sparx5_port *spx5_port; > + int err; > + > + ndev = devm_alloc_etherdev(sparx5->dev, sizeof(struct sparx5_port)); > + if (!ndev) > + return ERR_PTR(-ENOMEM); > + ... > + err = register_netdev(ndev); > + if (err) { > + dev_err(sparx5->dev, "netdev registration failed\n"); > + return ERR_PTR(err); > + } This is one of the classic bugs in network drivers. As soon as you call register_netdev() the interface is live. The network stack can start using it. But you have not finished initialzing spx5_port. So bad things are going to happen. Andrew
> +/* Add a potentially wrapping 32 bit value to a 64 bit counter */ > +static inline void sparx5_update_counter(u64 *cnt, u32 val) > +{ > + if (val < (*cnt & U32_MAX)) > + *cnt += (u64)1 << 32; /* value has wrapped */ > + > + *cnt = (*cnt & ~(u64)U32_MAX) + val; > +} I don't follow what this is doing. Could you give some examples? > +static const char *const sparx5_stats_layout[] = { > + "rx_in_bytes", > + "rx_symbol_err", > + "rx_pause", > + "rx_unsup_opcode", > +static void sparx5_update_port_stats(struct sparx5 *sparx5, int portno) > +{ > + struct sparx5_port *spx5_port = sparx5->ports[portno]; > + bool high_speed_dev = sparx5_is_high_speed_device(&spx5_port->conf); Reverse christmas tree. Which in this case, means you need to move the assignment into the body of the code. > +static void sparx5_get_sset_strings(struct net_device *ndev, u32 sset, u8 *data) > +{ > + struct sparx5_port *port = netdev_priv(ndev); > + struct sparx5 *sparx5 = port->sparx5; > + int idx; > + > + if (sset != ETH_SS_STATS) > + return; > + > + for (idx = 0; idx < sparx5->num_stats; idx++) > + memcpy(data + idx * ETH_GSTRING_LEN, > + sparx5->stats_layout[idx], ETH_GSTRING_LEN); You cannot use memcpy here, because the strings you have defined are not ETH_GSTRING_LEN long. We once had a driver which happened to have its strings at the end of a page. The memcpy would copy the string, but keep going passed the end of string, over the page boundary, and trigger a segmentation fault. Andrew
> +static int sparx5_port_open(struct net_device *ndev) > +{ > + struct sparx5_port *port = netdev_priv(ndev); > + int err = 0; > + > + sparx5_port_enable(port, true); > + if (port->conf.phy_mode != PHY_INTERFACE_MODE_NA) { > + err = phylink_of_phy_connect(port->phylink, port->of_node, 0); > + if (err) { > + netdev_err(ndev, "Could not attach to PHY\n"); > + return err; > + } > + } This looks a bit odd. PHY_INTERFACE_MODE_NA means don't touch, something else has already configured the MAC-PHY mode in the PHY. You should not not connect the PHY because of this. > +void sparx5_destroy_netdev(struct sparx5 *sparx5, struct sparx5_port *port) > +{ > + if (port->phylink) { > + /* Disconnect the phy */ > + if (rtnl_trylock()) { Why do you use rtnl_trylock()? Andrew
> +static void sparx5_phylink_mac_config(struct phylink_config *config, > + unsigned int mode, > + const struct phylink_link_state *state) > +{ > + struct sparx5_port *port = netdev_priv(to_net_dev(config->dev)); > + struct sparx5_port_config conf; > + int err = 0; > + > + conf = port->conf; > + conf.autoneg = state->an_enabled; > + conf.pause = state->pause; > + conf.duplex = state->duplex; > + conf.power_down = false; > + conf.portmode = state->interface; > + > + if (state->speed == SPEED_UNKNOWN) { > + /* When a SFP is plugged in we use capabilities to > + * default to the highest supported speed > + */ This looks suspicious. Russell, please could you look through this? Andrew
> +static void sparx5_attr_stp_state_set(struct sparx5_port *port, > + struct switchdev_trans *trans, > + u8 state) > +{ > + struct sparx5 *sparx5 = port->sparx5; > + > + if (!test_bit(port->portno, sparx5->bridge_mask)) { > + netdev_err(port->ndev, > + "Controlling non-bridged port %d?\n", port->portno); > + return; > + } > + > + switch (state) { > + case BR_STATE_FORWARDING: > + set_bit(port->portno, sparx5->bridge_fwd_mask); > + break; > + default: > + clear_bit(port->portno, sparx5->bridge_fwd_mask); > + break; > + } That is pretty odd. What about listening, learning, blocking? > +static int sparx5_port_bridge_join(struct sparx5_port *port, > + struct net_device *bridge) > +{ > + struct sparx5 *sparx5 = port->sparx5; > + > + if (bitmap_empty(sparx5->bridge_mask, SPX5_PORTS)) > + /* First bridged port */ > + sparx5->hw_bridge_dev = bridge; > + else > + if (sparx5->hw_bridge_dev != bridge) > + /* This is adding the port to a second bridge, this is > + * unsupported > + */ > + return -ENODEV; > + > + set_bit(port->portno, sparx5->bridge_mask); > + > + /* Port enters in bridge mode therefor don't need to copy to CPU > + * frames for multicast in case the bridge is not requesting them > + */ > + __dev_mc_unsync(port->ndev, sparx5_mc_unsync); > + > + return 0; > +} This looks suspiciously empty? Don't you need to tell the hardware which ports this port is bridges to? Normally you see some code which walks all the ports and finds those in the same bridge, and sets a bit which allows these ports to talk to each other. Is that code somewhere else? Andrew
On Sat, Nov 28, 2020 at 08:06:16PM +0100, Andrew Lunn wrote: > > +static void sparx5_phylink_mac_config(struct phylink_config *config, > > + unsigned int mode, > > + const struct phylink_link_state *state) > > +{ > > + struct sparx5_port *port = netdev_priv(to_net_dev(config->dev)); > > + struct sparx5_port_config conf; > > + int err = 0; > > + > > + conf = port->conf; > > + conf.autoneg = state->an_enabled; > > + conf.pause = state->pause; > > + conf.duplex = state->duplex; > > + conf.power_down = false; > > + conf.portmode = state->interface; > > + > > + if (state->speed == SPEED_UNKNOWN) { > > + /* When a SFP is plugged in we use capabilities to > > + * default to the highest supported speed > > + */ > > This looks suspicious. > > Russell, please could you look through this? Maybe if I was copied on the patch submission... I don't have the patches, and searching google for them is a faff, especially when site:kernel.org 20201127133307.2969817-1-steen.hegelund@microchip.com gives: Your search - site:kernel.org 20201127133307.2969817-1-steen.hegelund@microchip.com - did not match any documents. Suggestions: Make sure that all words are spelled correctly. Try different keywords. Try more general keywords. It seems that the modified MAINTAINERS entry is now annoyingly missing stuff. I don't know what the solution is - either I get irrelevant stuff or I don't get stuff I should. -- RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
Hi Russell, On 28/11/2020 19:37:07+0000, Russell King - ARM Linux admin wrote: > On Sat, Nov 28, 2020 at 08:06:16PM +0100, Andrew Lunn wrote: > > > +static void sparx5_phylink_mac_config(struct phylink_config *config, > > > + unsigned int mode, > > > + const struct phylink_link_state *state) > > > +{ > > > + struct sparx5_port *port = netdev_priv(to_net_dev(config->dev)); > > > + struct sparx5_port_config conf; > > > + int err = 0; > > > + > > > + conf = port->conf; > > > + conf.autoneg = state->an_enabled; > > > + conf.pause = state->pause; > > > + conf.duplex = state->duplex; > > > + conf.power_down = false; > > > + conf.portmode = state->interface; > > > + > > > + if (state->speed == SPEED_UNKNOWN) { > > > + /* When a SFP is plugged in we use capabilities to > > > + * default to the highest supported speed > > > + */ > > > > This looks suspicious. > > > > Russell, please could you look through this? > > Maybe if I was copied on the patch submission... I don't have the > patches, and searching google for them is a faff, especially > when > > site:kernel.org 20201127133307.2969817-1-steen.hegelund@microchip.com > > gives: > > Your search - site:kernel.org > 20201127133307.2969817-1-steen.hegelund@microchip.com - did not > match any documents. Suggestions: Make sure that all words are > spelled correctly. Try different keywords. Try more general > keywords. > http://lore.kernel.org/r/20201127133307.2969817-1-steen.hegelund@microchip.com does the right redirect. -- Alexandre Belloni, Bootlin Embedded Linux and Kernel engineering https://bootlin.com
> > Maybe if I was copied on the patch submission... I don't have the > > patches, and searching google for them is a faff, especially > > when > > > > site:kernel.org 20201127133307.2969817-1-steen.hegelund@microchip.com > > > > gives: > > > > Your search - site:kernel.org > > 20201127133307.2969817-1-steen.hegelund@microchip.com - did not > > match any documents. Suggestions: Make sure that all words are > > spelled correctly. Try different keywords. Try more general > > keywords. > > > > http://lore.kernel.org/r/20201127133307.2969817-1-steen.hegelund@microchip.com > does the right redirect. b4 mbox 20201127133307.2969817-1-steen.hegelund@microchip.com Also seems to work, and gives you it in mbox format, which mutt should understand. b4 is a standard part of Debian now. Andrew
On Sat, Nov 28, 2020 at 08:06:16PM +0100, Andrew Lunn wrote: > > +static void sparx5_phylink_mac_config(struct phylink_config *config, > > + unsigned int mode, > > + const struct phylink_link_state *state) > > +{ > > + struct sparx5_port *port = netdev_priv(to_net_dev(config->dev)); > > + struct sparx5_port_config conf; > > + int err = 0; > > + > > + conf = port->conf; > > + conf.autoneg = state->an_enabled; > > + conf.pause = state->pause; > > + conf.duplex = state->duplex; > > + conf.power_down = false; > > + conf.portmode = state->interface; > > + > > + if (state->speed == SPEED_UNKNOWN) { > > + /* When a SFP is plugged in we use capabilities to > > + * default to the highest supported speed > > + */ > > This looks suspicious. Yes, it looks highly suspicious. The fact that sparx5_phylink_mac_link_up() is empty, and sparx5_phylink_mac_config() does all the work suggests that this was developed before the phylink re-organisation, and this code hasn't been updated for it. Any new code for the kernel really ought to be updated for the new phylink methodology before it is accepted. Looking at sparx5_port_config(), it also seems to use PHY_INTERFACE_MODE_1000BASEX for both 1000BASE-X and 2500BASE-X. All very well for the driver to do that internally, but it's confusing when it comes to reviewing this stuff, especially when people outside of the driver (such as myself) reviewing it need to understand what's going on with the configuration. -- RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
On Sat, Nov 28, 2020 at 10:28:28PM +0000, Russell King - ARM Linux admin wrote: > On Sat, Nov 28, 2020 at 08:06:16PM +0100, Andrew Lunn wrote: > > > +static void sparx5_phylink_mac_config(struct phylink_config *config, > > > + unsigned int mode, > > > + const struct phylink_link_state *state) > > > +{ > > > + struct sparx5_port *port = netdev_priv(to_net_dev(config->dev)); > > > + struct sparx5_port_config conf; > > > + int err = 0; > > > + > > > + conf = port->conf; > > > + conf.autoneg = state->an_enabled; > > > + conf.pause = state->pause; > > > + conf.duplex = state->duplex; > > > + conf.power_down = false; > > > + conf.portmode = state->interface; > > > + > > > + if (state->speed == SPEED_UNKNOWN) { > > > + /* When a SFP is plugged in we use capabilities to > > > + * default to the highest supported speed > > > + */ > > > > This looks suspicious. > > Yes, it looks highly suspicious. The fact that > sparx5_phylink_mac_link_up() is empty, and sparx5_phylink_mac_config() > does all the work suggests that this was developed before the phylink > re-organisation, and this code hasn't been updated for it. > > Any new code for the kernel really ought to be updated for the new > phylink methodology before it is accepted. > > Looking at sparx5_port_config(), it also seems to use > PHY_INTERFACE_MODE_1000BASEX for both 1000BASE-X and 2500BASE-X. All > very well for the driver to do that internally, but it's confusing > when it comes to reviewing this stuff, especially when people outside > of the driver (such as myself) reviewing it need to understand what's > going on with the configuration. There are other issues too. Looking at sparx5_get_1000basex_status(), we have: + status->link = DEV2G5_PCS1G_LINK_STATUS_LINK_STATUS_GET(value) | + DEV2G5_PCS1G_LINK_STATUS_SYNC_STATUS_GET(value); Why is the link status the logical OR of these? + if ((lp_abil >> 8) & 1) /* symmetric pause */ + status->pause = MLO_PAUSE_RX | MLO_PAUSE_TX; + if (lp_abil & (1 << 7)) /* asymmetric pause */ + status->pause |= MLO_PAUSE_RX; is actually wrong, and I see I need to improve the documentation for mac_pcs_get_state(). The intention in the documentation was concerning hardware that indicated the _resolved_ status of pause modes. It was not intended that drivers resolve the pause modes themselves. Even so, the above is still wrong; it takes no account of what is being advertised at the local end. If one looks at the implementation in phylink_decode_c37_word(), one will notice there is code to deal with this. I think we ought to make phylink_decode_c37_word() and phylink_decode_sgmii_word() public functions, and then this driver can use these helpers to decode the link partner advertisement to the phylink state. Does the driver need to provide an ethtool .get_link function? That seems to bypass phylink. Why can't ethtool_op_get_link() be used? I think if ethtool_op_get_link() is used, we then have just one caller for sparx5_get_port_status(), which means "struct sparx5_port_status" can be eliminated and the code cleaned up to use the phylink decoding helpers. -- RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
On Sun, Nov 29, 2020 at 10:52:45AM +0000, Russell King - ARM Linux admin wrote: > On Sat, Nov 28, 2020 at 10:28:28PM +0000, Russell King - ARM Linux admin wrote: > > On Sat, Nov 28, 2020 at 08:06:16PM +0100, Andrew Lunn wrote: > > > > +static void sparx5_phylink_mac_config(struct phylink_config *config, > > > > + unsigned int mode, > > > > + const struct phylink_link_state *state) > > > > +{ > > > > + struct sparx5_port *port = netdev_priv(to_net_dev(config->dev)); > > > > + struct sparx5_port_config conf; > > > > + int err = 0; > > > > + > > > > + conf = port->conf; > > > > + conf.autoneg = state->an_enabled; > > > > + conf.pause = state->pause; > > > > + conf.duplex = state->duplex; > > > > + conf.power_down = false; > > > > + conf.portmode = state->interface; > > > > + > > > > + if (state->speed == SPEED_UNKNOWN) { > > > > + /* When a SFP is plugged in we use capabilities to > > > > + * default to the highest supported speed > > > > + */ > > > > > > This looks suspicious. > > > > Yes, it looks highly suspicious. The fact that > > sparx5_phylink_mac_link_up() is empty, and sparx5_phylink_mac_config() > > does all the work suggests that this was developed before the phylink > > re-organisation, and this code hasn't been updated for it. > > > > Any new code for the kernel really ought to be updated for the new > > phylink methodology before it is accepted. > > > > Looking at sparx5_port_config(), it also seems to use > > PHY_INTERFACE_MODE_1000BASEX for both 1000BASE-X and 2500BASE-X. All > > very well for the driver to do that internally, but it's confusing > > when it comes to reviewing this stuff, especially when people outside > > of the driver (such as myself) reviewing it need to understand what's > > going on with the configuration. > > There are other issues too. > > Looking at sparx5_get_1000basex_status(), we have: > > + status->link = DEV2G5_PCS1G_LINK_STATUS_LINK_STATUS_GET(value) | > + DEV2G5_PCS1G_LINK_STATUS_SYNC_STATUS_GET(value); > > Why is the link status the logical OR of these? > > + if ((lp_abil >> 8) & 1) /* symmetric pause */ > + status->pause = MLO_PAUSE_RX | MLO_PAUSE_TX; > + if (lp_abil & (1 << 7)) /* asymmetric pause */ > + status->pause |= MLO_PAUSE_RX; > > is actually wrong, and I see I need to improve the documentation for > mac_pcs_get_state(). The intention in the documentation was concerning > hardware that indicated the _resolved_ status of pause modes. It was > not intended that drivers resolve the pause modes themselves. > > Even so, the above is still wrong; it takes no account of what is being > advertised at the local end. If one looks at the implementation in > phylink_decode_c37_word(), one will notice there is code to deal with > this. > > I think we ought to make phylink_decode_c37_word() and > phylink_decode_sgmii_word() public functions, and then this driver can > use these helpers to decode the link partner advertisement to the > phylink state. > > Does the driver need to provide an ethtool .get_link function? That > seems to bypass phylink. Why can't ethtool_op_get_link() be used? > > I think if ethtool_op_get_link() is used, we then have just one caller > for sparx5_get_port_status(), which means "struct sparx5_port_status" > can be eliminated and the code cleaned up to use the phylink decoding > helpers. (Sorry, I keep spotting bits in the code - it's really not an easy chunk of code to review.) I'm also not sure that this is really correct: + status->serdes_link = !phy_validate(port->serdes, PHY_MODE_ETHERNET, + port->conf.portmode, NULL); The documentation for phy_validate() says: * Used to check that the current set of parameters can be handled by * the phy. Implementations are free to tune the parameters passed as * arguments if needed by some implementation detail or * constraints. It will not change any actual configuration of the * PHY, so calling it as many times as deemed fit will have no side * effect. and clearly, passing NULL for opts, gives the function no opportunity to do what it's intended, so phy_validate() is being used for some other purpose than that which the drivers/phy subsystem intends it to be used for. -- RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
On Sun, Nov 29, 2020 at 10:52:45AM +0000, Russell King - ARM Linux admin wrote:
> There are other issues too.
This is also wrong:
+ if (port->ndev && port->ndev->phydev)
+ status->link = port->ndev->phydev->link;
phylink already deals with that situation.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
> diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c > new file mode 100644 > index 000000000000..a91dd9532f1c > --- /dev/null > +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c > @@ -0,0 +1,1027 @@ > +// SPDX-License-Identifier: GPL-2.0+ > +/* Microchip Sparx5 Switch driver > + * > + * Copyright (c) 2020 Microchip Technology Inc. and its subsidiaries. > + */ > + > +#include <linux/ethtool.h> > + > +#include "sparx5_main.h" > +#include "sparx5_port.h" > + > +/* Add a potentially wrapping 32 bit value to a 64 bit counter */ > +static inline void sparx5_update_counter(u64 *cnt, u32 val) > +{ > + if (val < (*cnt & U32_MAX)) > + *cnt += (u64)1 << 32; /* value has wrapped */ > + > + *cnt = (*cnt & ~(u64)U32_MAX) + val; > +} No inline functions in C files. Let the compiler decide. And i now think i get what this is doing. But i'm surprised at the hardware. Normally registers like this which are expected to wrap around, reset to 0 on read. Andrew
> +static void sparx5_hw_lock(struct sparx5 *sparx5) > +{ > + mutex_lock(&sparx5->lock); > +} > + > +static void sparx5_hw_unlock(struct sparx5 *sparx5) > +{ > + mutex_unlock(&sparx5->lock); > +} Why is this mutex special and gets a wrapper where as the other two don't? If there is no reason for the wrapper, please remove it. Andrew
> +#define SPX5_RD_(sparx5, id, tinst, tcnt, \ > + gbase, ginst, gcnt, gwidth, \ > + raddr, rinst, rcnt, rwidth) \ > + readl(spx5_addr((sparx5)->regs, id, tinst, tcnt, \ > + gbase, ginst, gcnt, gwidth, \ > + raddr, rinst, rcnt, rwidth)) > + > +#define SPX5_INST_RD_(iomem, id, tinst, tcnt, \ > + gbase, ginst, gcnt, gwidth, \ > + raddr, rinst, rcnt, rwidth) \ > + readl(spx5_inst_addr(iomem, \ > + gbase, ginst, gcnt, gwidth, \ > + raddr, rinst, rcnt, rwidth)) > + > +#define SPX5_WR_(val, sparx5, id, tinst, tcnt, \ > + gbase, ginst, gcnt, gwidth, \ > + raddr, rinst, rcnt, rwidth) \ > + writel(val, spx5_addr((sparx5)->regs, id, tinst, tcnt, \ > + gbase, ginst, gcnt, gwidth, \ > + raddr, rinst, rcnt, rwidth)) > + > +#define SPX5_INST_WR_(val, iomem, id, tinst, tcnt, \ > + gbase, ginst, gcnt, gwidth, \ > + raddr, rinst, rcnt, rwidth) \ > + writel(val, spx5_inst_addr(iomem, \ > + gbase, ginst, gcnt, gwidth, \ > + raddr, rinst, rcnt, rwidth)) > + > +#define SPX5_RMW_(val, mask, sparx5, id, tinst, tcnt, \ > + gbase, ginst, gcnt, gwidth, \ > + raddr, rinst, rcnt, rwidth) \ > + do { \ > + u32 _v_; \ > + u32 _m_ = mask; \ > + void __iomem *addr = \ > + spx5_addr((sparx5)->regs, id, tinst, tcnt, \ > + gbase, ginst, gcnt, gwidth, \ > + raddr, rinst, rcnt, rwidth); \ > + _v_ = readl(addr); \ > + _v_ = ((_v_ & ~(_m_)) | ((val) & (_m_))); \ > + writel(_v_, addr); \ > + } while (0) > + > +#define SPX5_INST_RMW_(val, mask, iomem, id, tinst, tcnt, \ > + gbase, ginst, gcnt, gwidth, \ > + raddr, rinst, rcnt, rwidth) \ > + do { \ > + u32 _v_; \ > + u32 _m_ = mask; \ > + void __iomem *addr = \ > + spx5_inst_addr(iomem, \ > + gbase, ginst, gcnt, gwidth, \ > + raddr, rinst, rcnt, rwidth); \ > + _v_ = readl(addr); \ > + _v_ = ((_v_ & ~(_m_)) | ((val) & (_m_))); \ > + writel(_v_, addr); \ > + } while (0) > + > +#define SPX5_REG_RD_(regaddr) \ > + readl(regaddr) > + > +#define SPX5_REG_WR_(val, regaddr) \ > + writel(val, regaddr) > + > +#define SPX5_REG_RMW_(val, mask, regaddr) \ > + do { \ > + u32 _v_; \ > + u32 _m_ = mask; \ > + void __iomem *_r_ = regaddr; \ > + _v_ = readl(_r_); \ > + _v_ = ((_v_ & ~(_m_)) | ((val) & (_m_))); \ > + writel(_v_, _r_); \ > + } while (0) > + > +#define SPX5_REG_GET_(sparx5, id, tinst, tcnt, \ > + gbase, ginst, gcnt, gwidth, \ > + raddr, rinst, rcnt, rwidth) \ > + spx5_addr((sparx5)->regs, id, tinst, tcnt, \ > + gbase, ginst, gcnt, gwidth, \ > + raddr, rinst, rcnt, rwidth) > + > +#define SPX5_RD(...) SPX5_RD_(__VA_ARGS__) > +#define SPX5_WR(...) SPX5_WR_(__VA_ARGS__) > +#define SPX5_RMW(...) SPX5_RMW_(__VA_ARGS__) > +#define SPX5_INST_RD(...) SPX5_INST_RD_(__VA_ARGS__) > +#define SPX5_INST_WR(...) SPX5_INST_WR_(__VA_ARGS__) > +#define SPX5_INST_RMW(...) SPX5_INST_RMW_(__VA_ARGS__) > +#define SPX5_INST_GET(sparx5, id, tinst) ((sparx5)->regs[(id) + (tinst)]) > +#define SPX5_REG_RMW(...) SPX5_REG_RMW_(__VA_ARGS__) > +#define SPX5_REG_WR(...) SPX5_REG_WR_(__VA_ARGS__) > +#define SPX5_REG_RD(...) SPX5_REG_RD_(__VA_ARGS__) > +#define SPX5_REG_GET(...) SPX5_REG_GET_(__VA_ARGS__) I don't see any reason for macro magic here. If this just left over from HAL code? Please turn this all into functions. Andrew
On 27.11.2020 18:00, Andrew Lunn wrote: >EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > >> + reg-names: >> + minItems: 153 >> + items: >> + - const: dev2g5_0 >> + - const: dev5g_0 >> + - const: pcs5g_br_0 >> + - const: dev2g5_1 >> + - const: dev5g_1 >... >> + - const: ana_ac >> + - const: vop > >> + switch: switch@600000000 { >> + compatible = "microchip,sparx5-switch"; >> + reg = <0x10004000 0x4000>, /* dev2g5_0 */ >> + <0x10008000 0x4000>, /* dev5g_0 */ >> + <0x1000c000 0x4000>, /* pcs5g_br_0 */ >> + <0x10010000 0x4000>, /* dev2g5_1 */ >> + <0x10014000 0x4000>, /* dev5g_1 */ > >... > >> + <0x11800000 0x100000>, /* ana_l2 */ >> + <0x11900000 0x100000>, /* ana_ac */ >> + <0x11a00000 0x100000>; /* vop */ > >This is a pretty unusual binding. > >Why is it not > >reg = <0x10004000 0x1af8000> > >and the driver can then break up the memory into its sub ranges? > > Andrew Hi Andrew, Since the targets used by the driver is not always in the natural address order (e.g. the dev2g5_x targets), I thought it best to let the DT take care of this since this cannot be probed. I am aware that this causes extra mappings compared to the one-range strategy, but this layout seems more transparent to me, also when mapped over PCIe. BR Steen --------------------------------------- Steen Hegelund steen.hegelund@microchip.com
On 27.11.2020 18:15, Andrew Lunn wrote: >EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > >This is a very large driver, which is going to make it slow to review. Hi Andrew, Yes I am aware of that, but I think that what is available with this series, makes for a nice package that can be tested by us, and used by our customers. > >> +static int sparx5_probe_port(struct sparx5 *sparx5, >> + struct device_node *portnp, >> + struct phy *serdes, >> + u32 portno, >> + struct sparx5_port_config *conf) >> +{ >> + phy_interface_t phy_mode = conf->phy_mode; >> + struct sparx5_port *spx5_port; >> + struct net_device *ndev; >> + struct phylink *phylink; >> + int err; >> + >> + err = sparx5_create_targets(sparx5); >> + if (err) >> + return err; >> + ndev = sparx5_create_netdev(sparx5, portno); >> + if (IS_ERR(ndev)) { >> + dev_err(sparx5->dev, "Could not create net device: %02u\n", portno); >> + return PTR_ERR(ndev); >> + } >> + spx5_port = netdev_priv(ndev); >> + spx5_port->of_node = portnp; >> + spx5_port->serdes = serdes; >> + spx5_port->pvid = NULL_VID; >> + spx5_port->signd_internal = true; >> + spx5_port->signd_active_high = true; >> + spx5_port->signd_enable = true; >> + spx5_port->flow_control = false; >> + spx5_port->max_vlan_tags = SPX5_PORT_MAX_TAGS_NONE; >> + spx5_port->vlan_type = SPX5_VLAN_PORT_TYPE_UNAWARE; >> + spx5_port->custom_etype = 0x8880; /* Vitesse */ >> + conf->portmode = conf->phy_mode; >> + spx5_port->conf.speed = SPEED_UNKNOWN; >> + spx5_port->conf.power_down = true; >> + sparx5->ports[portno] = spx5_port; > > >> +struct net_device *sparx5_create_netdev(struct sparx5 *sparx5, u32 portno) >> +{ >> + struct net_device *ndev; >> + struct sparx5_port *spx5_port; >> + int err; >> + >> + ndev = devm_alloc_etherdev(sparx5->dev, sizeof(struct sparx5_port)); >> + if (!ndev) >> + return ERR_PTR(-ENOMEM); >> + > >... > >> + err = register_netdev(ndev); >> + if (err) { >> + dev_err(sparx5->dev, "netdev registration failed\n"); >> + return ERR_PTR(err); >> + } > >This is one of the classic bugs in network drivers. As soon as you >call register_netdev() the interface is live. The network stack can >start using it. But you have not finished initialzing spx5_port. So >bad things are going to happen. Oops. I will fix that. Thanks for the comments. Steen > > Andrew BR Steen --------------------------------------- Steen Hegelund steen.hegelund@microchip.com
On 28.11.2020 19:45, Andrew Lunn wrote: >EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > >> +/* Add a potentially wrapping 32 bit value to a 64 bit counter */ >> +static inline void sparx5_update_counter(u64 *cnt, u32 val) >> +{ >> + if (val < (*cnt & U32_MAX)) >> + *cnt += (u64)1 << 32; /* value has wrapped */ >> + >> + *cnt = (*cnt & ~(u64)U32_MAX) + val; >> +} > >I don't follow what this is doing. Could you give some examples? The statistics counters comes from different sources, and unfortunately have different layouts: Some are 32 bit and some a 40 bit, so this function is a wrapper to be able to handle them in the same way, polling the counters often enough to be able to catch overruns. > >> +static const char *const sparx5_stats_layout[] = { >> + "rx_in_bytes", >> + "rx_symbol_err", >> + "rx_pause", >> + "rx_unsup_opcode", > >> +static void sparx5_update_port_stats(struct sparx5 *sparx5, int portno) >> +{ >> + struct sparx5_port *spx5_port = sparx5->ports[portno]; >> + bool high_speed_dev = sparx5_is_high_speed_device(&spx5_port->conf); > >Reverse christmas tree. Which in this case, means you need to move the >assignment into the body of the code. OK. > >> +static void sparx5_get_sset_strings(struct net_device *ndev, u32 sset, u8 *data) >> +{ >> + struct sparx5_port *port = netdev_priv(ndev); >> + struct sparx5 *sparx5 = port->sparx5; >> + int idx; >> + >> + if (sset != ETH_SS_STATS) >> + return; >> + >> + for (idx = 0; idx < sparx5->num_stats; idx++) >> + memcpy(data + idx * ETH_GSTRING_LEN, >> + sparx5->stats_layout[idx], ETH_GSTRING_LEN); > >You cannot use memcpy here, because the strings you have defined are >not ETH_GSTRING_LEN long. We once had a driver which happened to have >its strings at the end of a page. The memcpy would copy the string, >but keep going passed the end of string, over the page boundary, and >trigger a segmentation fault. Yes, I see that. Thanks for the comments /Steen > > Andrew BR Steen --------------------------------------- Steen Hegelund steen.hegelund@microchip.com
On 28.11.2020 20:03, Andrew Lunn wrote: >EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > >> +static int sparx5_port_open(struct net_device *ndev) >> +{ >> + struct sparx5_port *port = netdev_priv(ndev); >> + int err = 0; >> + >> + sparx5_port_enable(port, true); >> + if (port->conf.phy_mode != PHY_INTERFACE_MODE_NA) { >> + err = phylink_of_phy_connect(port->phylink, port->of_node, 0); >> + if (err) { >> + netdev_err(ndev, "Could not attach to PHY\n"); >> + return err; >> + } >> + } > >This looks a bit odd. PHY_INTERFACE_MODE_NA means don't touch, >something else has already configured the MAC-PHY mode in the PHY. >You should not not connect the PHY because of this. Hmm. I will have to revisit this again. The intent was to be able to destinguish between regular PHYs and SFPs (as read from the DT). But maybe the phylink_of_phy_connect function handles this automatically... > >> +void sparx5_destroy_netdev(struct sparx5 *sparx5, struct sparx5_port *port) >> +{ >> + if (port->phylink) { >> + /* Disconnect the phy */ >> + if (rtnl_trylock()) { > >Why do you use rtnl_trylock()? The sparx5_port_stop() in turn calls phylink_stop() that expects the lock to be taken. Should I rather just call rtnl_lock()? Thanks for your comments /Steen > > Andrew BR Steen --------------------------------------- Steen Hegelund steen.hegelund@microchip.com
On 29.11.2020 18:26, Andrew Lunn wrote: >EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > >> +static void sparx5_hw_lock(struct sparx5 *sparx5) >> +{ >> + mutex_lock(&sparx5->lock); >> +} >> + >> +static void sparx5_hw_unlock(struct sparx5 *sparx5) >> +{ >> + mutex_unlock(&sparx5->lock); >> +} > >Why is this mutex special and gets a wrapper where as the other two >don't? If there is no reason for the wrapper, please remove it. > > Andrew Hi Andrew, There is no need for any special treatment, so I will remove the wrapper. BR Steen --------------------------------------- Steen Hegelund steen.hegelund@microchip.com
On 29.11.2020 18:16, Andrew Lunn wrote: >EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > >> diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c >> new file mode 100644 >> index 000000000000..a91dd9532f1c >> --- /dev/null >> +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c >> @@ -0,0 +1,1027 @@ >> +// SPDX-License-Identifier: GPL-2.0+ >> +/* Microchip Sparx5 Switch driver >> + * >> + * Copyright (c) 2020 Microchip Technology Inc. and its subsidiaries. >> + */ >> + >> +#include <linux/ethtool.h> >> + >> +#include "sparx5_main.h" >> +#include "sparx5_port.h" >> + >> +/* Add a potentially wrapping 32 bit value to a 64 bit counter */ >> +static inline void sparx5_update_counter(u64 *cnt, u32 val) >> +{ >> + if (val < (*cnt & U32_MAX)) >> + *cnt += (u64)1 << 32; /* value has wrapped */ >> + >> + *cnt = (*cnt & ~(u64)U32_MAX) + val; >> +} > >No inline functions in C files. Let the compiler decide. > >And i now think i get what this is doing. But i'm surprised at the >hardware. Normally registers like this which are expected to wrap >around, reset to 0 on read. > > Andrew Hi Andrew, I will remove the inline. In our case the counters just wraps around (at either 32 or 40 bit). Thanks for your comments BR Steen --------------------------------------- Steen Hegelund steen.hegelund@microchip.com
On Mon, Nov 30, 2020 at 02:09:34PM +0100, Steen Hegelund wrote: > On 27.11.2020 18:00, Andrew Lunn wrote: > > EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > > > > > + reg-names: > > > + minItems: 153 > > > + items: > > > + - const: dev2g5_0 > > > + - const: dev5g_0 > > > + - const: pcs5g_br_0 > > > + - const: dev2g5_1 > > > + - const: dev5g_1 > > ... > > > + - const: ana_ac > > > + - const: vop > > > > > + switch: switch@600000000 { > > > + compatible = "microchip,sparx5-switch"; > > > + reg = <0x10004000 0x4000>, /* dev2g5_0 */ > > > + <0x10008000 0x4000>, /* dev5g_0 */ > > > + <0x1000c000 0x4000>, /* pcs5g_br_0 */ > > > + <0x10010000 0x4000>, /* dev2g5_1 */ > > > + <0x10014000 0x4000>, /* dev5g_1 */ > > > > ... > > > > > + <0x11800000 0x100000>, /* ana_l2 */ > > > + <0x11900000 0x100000>, /* ana_ac */ > > > + <0x11a00000 0x100000>; /* vop */ > > > > This is a pretty unusual binding. > > > > Why is it not > > > > reg = <0x10004000 0x1af8000> > > > > and the driver can then break up the memory into its sub ranges? > > > > Andrew > Hi Andrew, > > Since the targets used by the driver is not always in the natural > address order (e.g. the dev2g5_x targets), I thought it best to let the DT > take care of this since this cannot be probed. I am aware that this causes > extra mappings compared to the one-range strategy, but this layout seems more > transparent to me, also when mapped over PCIe. The question is, do you have a device tree usage for this? Are there devices in the family which have the regions in a different order? You can easily move this table into the driver, and let the driver break the region up. That would be normal. Andrew
On 28.11.2020 22:28, Russell King - ARM Linux admin wrote: >EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > >On Sat, Nov 28, 2020 at 08:06:16PM +0100, Andrew Lunn wrote: >> > +static void sparx5_phylink_mac_config(struct phylink_config *config, >> > + unsigned int mode, >> > + const struct phylink_link_state *state) >> > +{ >> > + struct sparx5_port *port = netdev_priv(to_net_dev(config->dev)); >> > + struct sparx5_port_config conf; >> > + int err = 0; >> > + >> > + conf = port->conf; >> > + conf.autoneg = state->an_enabled; >> > + conf.pause = state->pause; >> > + conf.duplex = state->duplex; >> > + conf.power_down = false; >> > + conf.portmode = state->interface; >> > + >> > + if (state->speed == SPEED_UNKNOWN) { >> > + /* When a SFP is plugged in we use capabilities to >> > + * default to the highest supported speed >> > + */ >> >> This looks suspicious. > >Yes, it looks highly suspicious. The fact that >sparx5_phylink_mac_link_up() is empty, and sparx5_phylink_mac_config() >does all the work suggests that this was developed before the phylink >re-organisation, and this code hasn't been updated for it. Hi Russell, The work started on 5.4 and I think I may have not really understood the finer details in the changes in 5.9. > >Any new code for the kernel really ought to be updated for the new >phylink methodology before it is accepted. > Could you point me to a good example of the new methodology? >Looking at sparx5_port_config(), it also seems to use >PHY_INTERFACE_MODE_1000BASEX for both 1000BASE-X and 2500BASE-X. All >very well for the driver to do that internally, but it's confusing >when it comes to reviewing this stuff, especially when people outside >of the driver (such as myself) reviewing it need to understand what's >going on with the configuration. Hmmm. I better check if this is as intended. > >-- >RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ >FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last! Thanks for your comments. BR Steen --------------------------------------- Steen Hegelund steen.hegelund@microchip.com
On 29.11.2020 10:52, Russell King - ARM Linux admin wrote: >EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > >On Sat, Nov 28, 2020 at 10:28:28PM +0000, Russell King - ARM Linux admin wrote: >> On Sat, Nov 28, 2020 at 08:06:16PM +0100, Andrew Lunn wrote: >> > > +static void sparx5_phylink_mac_config(struct phylink_config *config, >> > > + unsigned int mode, >> > > + const struct phylink_link_state *state) >> > > +{ >> > > + struct sparx5_port *port = netdev_priv(to_net_dev(config->dev)); >> > > + struct sparx5_port_config conf; >> > > + int err = 0; >> > > + >> > > + conf = port->conf; >> > > + conf.autoneg = state->an_enabled; >> > > + conf.pause = state->pause; >> > > + conf.duplex = state->duplex; >> > > + conf.power_down = false; >> > > + conf.portmode = state->interface; >> > > + >> > > + if (state->speed == SPEED_UNKNOWN) { >> > > + /* When a SFP is plugged in we use capabilities to >> > > + * default to the highest supported speed >> > > + */ >> > >> > This looks suspicious. >> >> Yes, it looks highly suspicious. The fact that >> sparx5_phylink_mac_link_up() is empty, and sparx5_phylink_mac_config() >> does all the work suggests that this was developed before the phylink >> re-organisation, and this code hasn't been updated for it. >> >> Any new code for the kernel really ought to be updated for the new >> phylink methodology before it is accepted. >> >> Looking at sparx5_port_config(), it also seems to use >> PHY_INTERFACE_MODE_1000BASEX for both 1000BASE-X and 2500BASE-X. All >> very well for the driver to do that internally, but it's confusing >> when it comes to reviewing this stuff, especially when people outside >> of the driver (such as myself) reviewing it need to understand what's >> going on with the configuration. > Hi Russell, >There are other issues too. > >Looking at sparx5_get_1000basex_status(), we have: > > + status->link = DEV2G5_PCS1G_LINK_STATUS_LINK_STATUS_GET(value) | > + DEV2G5_PCS1G_LINK_STATUS_SYNC_STATUS_GET(value); > >Why is the link status the logical OR of these? Oops: It should have been AND. Well spotted. > > + if ((lp_abil >> 8) & 1) /* symmetric pause */ > + status->pause = MLO_PAUSE_RX | MLO_PAUSE_TX; > + if (lp_abil & (1 << 7)) /* asymmetric pause */ > + status->pause |= MLO_PAUSE_RX; > >is actually wrong, and I see I need to improve the documentation for >mac_pcs_get_state(). The intention in the documentation was concerning >hardware that indicated the _resolved_ status of pause modes. It was >not intended that drivers resolve the pause modes themselves. > >Even so, the above is still wrong; it takes no account of what is being >advertised at the local end. If one looks at the implementation in >phylink_decode_c37_word(), one will notice there is code to deal with >this. > >I think we ought to make phylink_decode_c37_word() and >phylink_decode_sgmii_word() public functions, and then this driver can >use these helpers to decode the link partner advertisement to the >phylink state. Should I remove the current implementation and use something like what is in phylink_decode_c37_word() and phylink_decode_sgmii_word() in the meantime? > >Does the driver need to provide an ethtool .get_link function? That >seems to bypass phylink. Why can't ethtool_op_get_link() be used? I think that I tried that earlier, but ran into problems. I better revisit this, and try out your suggestion. > >I think if ethtool_op_get_link() is used, we then have just one caller >for sparx5_get_port_status(), which means "struct sparx5_port_status" >can be eliminated and the code cleaned up to use the phylink decoding >helpers. > >-- >RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ >FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last! Thanks for your comments. BR Steen --------------------------------------- Steen Hegelund steen.hegelund@microchip.com
On 29.11.2020 11:30, Russell King - ARM Linux admin wrote: >EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > >On Sun, Nov 29, 2020 at 10:52:45AM +0000, Russell King - ARM Linux admin wrote: >> There are other issues too. > >This is also wrong: > >+ if (port->ndev && port->ndev->phydev) >+ status->link = port->ndev->phydev->link; > >phylink already deals with that situation. So if I need the link state, what interface should I then use to get it? > >-- >RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ >FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last! Thanks for your comments BR Steen --------------------------------------- Steen Hegelund steen.hegelund@microchip.com
On 29.11.2020 11:28, Russell King - ARM Linux admin wrote: >EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > >On Sun, Nov 29, 2020 at 10:52:45AM +0000, Russell King - ARM Linux admin wrote: >> On Sat, Nov 28, 2020 at 10:28:28PM +0000, Russell King - ARM Linux admin wrote: >> > On Sat, Nov 28, 2020 at 08:06:16PM +0100, Andrew Lunn wrote: >> > > > +static void sparx5_phylink_mac_config(struct phylink_config *config, >> > > > + unsigned int mode, >> > > > + const struct phylink_link_state *state) >> > > > +{ >> > > > + struct sparx5_port *port = netdev_priv(to_net_dev(config->dev)); >> > > > + struct sparx5_port_config conf; >> > > > + int err = 0; >> > > > + >> > > > + conf = port->conf; >> > > > + conf.autoneg = state->an_enabled; >> > > > + conf.pause = state->pause; >> > > > + conf.duplex = state->duplex; >> > > > + conf.power_down = false; >> > > > + conf.portmode = state->interface; >> > > > + >> > > > + if (state->speed == SPEED_UNKNOWN) { >> > > > + /* When a SFP is plugged in we use capabilities to >> > > > + * default to the highest supported speed >> > > > + */ >> > > >> > > This looks suspicious. >> > >> > Yes, it looks highly suspicious. The fact that >> > sparx5_phylink_mac_link_up() is empty, and sparx5_phylink_mac_config() >> > does all the work suggests that this was developed before the phylink >> > re-organisation, and this code hasn't been updated for it. >> > >> > Any new code for the kernel really ought to be updated for the new >> > phylink methodology before it is accepted. >> > >> > Looking at sparx5_port_config(), it also seems to use >> > PHY_INTERFACE_MODE_1000BASEX for both 1000BASE-X and 2500BASE-X. All >> > very well for the driver to do that internally, but it's confusing >> > when it comes to reviewing this stuff, especially when people outside >> > of the driver (such as myself) reviewing it need to understand what's >> > going on with the configuration. >> >> There are other issues too. >> >> Looking at sparx5_get_1000basex_status(), we have: >> >> + status->link = DEV2G5_PCS1G_LINK_STATUS_LINK_STATUS_GET(value) | >> + DEV2G5_PCS1G_LINK_STATUS_SYNC_STATUS_GET(value); >> >> Why is the link status the logical OR of these? >> >> + if ((lp_abil >> 8) & 1) /* symmetric pause */ >> + status->pause = MLO_PAUSE_RX | MLO_PAUSE_TX; >> + if (lp_abil & (1 << 7)) /* asymmetric pause */ >> + status->pause |= MLO_PAUSE_RX; >> >> is actually wrong, and I see I need to improve the documentation for >> mac_pcs_get_state(). The intention in the documentation was concerning >> hardware that indicated the _resolved_ status of pause modes. It was >> not intended that drivers resolve the pause modes themselves. >> >> Even so, the above is still wrong; it takes no account of what is being >> advertised at the local end. If one looks at the implementation in >> phylink_decode_c37_word(), one will notice there is code to deal with >> this. >> >> I think we ought to make phylink_decode_c37_word() and >> phylink_decode_sgmii_word() public functions, and then this driver can >> use these helpers to decode the link partner advertisement to the >> phylink state. >> >> Does the driver need to provide an ethtool .get_link function? That >> seems to bypass phylink. Why can't ethtool_op_get_link() be used? >> >> I think if ethtool_op_get_link() is used, we then have just one caller >> for sparx5_get_port_status(), which means "struct sparx5_port_status" >> can be eliminated and the code cleaned up to use the phylink decoding >> helpers. > >(Sorry, I keep spotting bits in the code - it's really not an easy >chunk of code to review.) > >I'm also not sure that this is really correct: > >+ status->serdes_link = !phy_validate(port->serdes, PHY_MODE_ETHERNET, >+ port->conf.portmode, NULL); > >The documentation for phy_validate() says: > > * Used to check that the current set of parameters can be handled by > * the phy. Implementations are free to tune the parameters passed as > * arguments if needed by some implementation detail or > * constraints. It will not change any actual configuration of the > * PHY, so calling it as many times as deemed fit will have no side > * effect. > >and clearly, passing NULL for opts, gives the function no opportunity >to do what it's intended, so phy_validate() is being used for some >other purpose than that which the drivers/phy subsystem intends it to >be used for. Hi Russell, Yes this is a bit of an overload of the phy_validate(). The Serdes driver validates the portmode, and if OK, it returns the current state of the link (bool), so that the client (SwitchDev) can know if the link parameters so far results in a operational link. It does not change any configuration. I have not found another way to get the state of a generic PHY driver, but if there is one, I will certainly use that. Can you suggest the way to go? > >-- >RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ >FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last! BR Steen --------------------------------------- Steen Hegelund steen.hegelund@microchip.com
On 29.11.2020 18:35, Andrew Lunn wrote: >EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > >> +#define SPX5_RD_(sparx5, id, tinst, tcnt, \ >> + gbase, ginst, gcnt, gwidth, \ >> + raddr, rinst, rcnt, rwidth) \ >> + readl(spx5_addr((sparx5)->regs, id, tinst, tcnt, \ >> + gbase, ginst, gcnt, gwidth, \ >> + raddr, rinst, rcnt, rwidth)) >> + >> +#define SPX5_INST_RD_(iomem, id, tinst, tcnt, \ >> + gbase, ginst, gcnt, gwidth, \ >> + raddr, rinst, rcnt, rwidth) \ >> + readl(spx5_inst_addr(iomem, \ >> + gbase, ginst, gcnt, gwidth, \ >> + raddr, rinst, rcnt, rwidth)) >> + >> +#define SPX5_WR_(val, sparx5, id, tinst, tcnt, \ >> + gbase, ginst, gcnt, gwidth, \ >> + raddr, rinst, rcnt, rwidth) \ >> + writel(val, spx5_addr((sparx5)->regs, id, tinst, tcnt, \ >> + gbase, ginst, gcnt, gwidth, \ >> + raddr, rinst, rcnt, rwidth)) >> + >> +#define SPX5_INST_WR_(val, iomem, id, tinst, tcnt, \ >> + gbase, ginst, gcnt, gwidth, \ >> + raddr, rinst, rcnt, rwidth) \ >> + writel(val, spx5_inst_addr(iomem, \ >> + gbase, ginst, gcnt, gwidth, \ >> + raddr, rinst, rcnt, rwidth)) >> + >> +#define SPX5_RMW_(val, mask, sparx5, id, tinst, tcnt, \ >> + gbase, ginst, gcnt, gwidth, \ >> + raddr, rinst, rcnt, rwidth) \ >> + do { \ >> + u32 _v_; \ >> + u32 _m_ = mask; \ >> + void __iomem *addr = \ >> + spx5_addr((sparx5)->regs, id, tinst, tcnt, \ >> + gbase, ginst, gcnt, gwidth, \ >> + raddr, rinst, rcnt, rwidth); \ >> + _v_ = readl(addr); \ >> + _v_ = ((_v_ & ~(_m_)) | ((val) & (_m_))); \ >> + writel(_v_, addr); \ >> + } while (0) >> + >> +#define SPX5_INST_RMW_(val, mask, iomem, id, tinst, tcnt, \ >> + gbase, ginst, gcnt, gwidth, \ >> + raddr, rinst, rcnt, rwidth) \ >> + do { \ >> + u32 _v_; \ >> + u32 _m_ = mask; \ >> + void __iomem *addr = \ >> + spx5_inst_addr(iomem, \ >> + gbase, ginst, gcnt, gwidth, \ >> + raddr, rinst, rcnt, rwidth); \ >> + _v_ = readl(addr); \ >> + _v_ = ((_v_ & ~(_m_)) | ((val) & (_m_))); \ >> + writel(_v_, addr); \ >> + } while (0) >> + >> +#define SPX5_REG_RD_(regaddr) \ >> + readl(regaddr) >> + >> +#define SPX5_REG_WR_(val, regaddr) \ >> + writel(val, regaddr) >> + >> +#define SPX5_REG_RMW_(val, mask, regaddr) \ >> + do { \ >> + u32 _v_; \ >> + u32 _m_ = mask; \ >> + void __iomem *_r_ = regaddr; \ >> + _v_ = readl(_r_); \ >> + _v_ = ((_v_ & ~(_m_)) | ((val) & (_m_))); \ >> + writel(_v_, _r_); \ >> + } while (0) >> + >> +#define SPX5_REG_GET_(sparx5, id, tinst, tcnt, \ >> + gbase, ginst, gcnt, gwidth, \ >> + raddr, rinst, rcnt, rwidth) \ >> + spx5_addr((sparx5)->regs, id, tinst, tcnt, \ >> + gbase, ginst, gcnt, gwidth, \ >> + raddr, rinst, rcnt, rwidth) >> + >> +#define SPX5_RD(...) SPX5_RD_(__VA_ARGS__) >> +#define SPX5_WR(...) SPX5_WR_(__VA_ARGS__) >> +#define SPX5_RMW(...) SPX5_RMW_(__VA_ARGS__) >> +#define SPX5_INST_RD(...) SPX5_INST_RD_(__VA_ARGS__) >> +#define SPX5_INST_WR(...) SPX5_INST_WR_(__VA_ARGS__) >> +#define SPX5_INST_RMW(...) SPX5_INST_RMW_(__VA_ARGS__) >> +#define SPX5_INST_GET(sparx5, id, tinst) ((sparx5)->regs[(id) + (tinst)]) >> +#define SPX5_REG_RMW(...) SPX5_REG_RMW_(__VA_ARGS__) >> +#define SPX5_REG_WR(...) SPX5_REG_WR_(__VA_ARGS__) >> +#define SPX5_REG_RD(...) SPX5_REG_RD_(__VA_ARGS__) >> +#define SPX5_REG_GET(...) SPX5_REG_GET_(__VA_ARGS__) > >I don't see any reason for macro magic here. If this just left over >from HAL code? Please turn this all into functions. > > Andrew > Thanks for the comment. I will transform this into functions. BR Steen --------------------------------------- Steen Hegelund steen.hegelund@microchip.com
On Mon, Nov 30, 2020 at 03:30:23PM +0100, Steen Hegelund wrote: > On 29.11.2020 11:30, Russell King - ARM Linux admin wrote: > > EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > > > > On Sun, Nov 29, 2020 at 10:52:45AM +0000, Russell King - ARM Linux admin wrote: > > > There are other issues too. > > > > This is also wrong: > > > > + if (port->ndev && port->ndev->phydev) > > + status->link = port->ndev->phydev->link; > > > > phylink already deals with that situation. > > So if I need the link state, what interface should I then use to get it? The network carrier state reflects the link state. As I've said, using the ethtool_op_get_link() is entirely suitable for the ethtool .get_link method - other network drivers use this with phylink and it works. -- RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
On Mon, Nov 30, 2020 at 03:15:56PM +0100, Steen Hegelund wrote: > On 29.11.2020 10:52, Russell King - ARM Linux admin wrote: > > EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > > > > On Sat, Nov 28, 2020 at 10:28:28PM +0000, Russell King - ARM Linux admin wrote: > > > On Sat, Nov 28, 2020 at 08:06:16PM +0100, Andrew Lunn wrote: > > > > > +static void sparx5_phylink_mac_config(struct phylink_config *config, > > > > > + unsigned int mode, > > > > > + const struct phylink_link_state *state) > > > > > +{ > > > > > + struct sparx5_port *port = netdev_priv(to_net_dev(config->dev)); > > > > > + struct sparx5_port_config conf; > > > > > + int err = 0; > > > > > + > > > > > + conf = port->conf; > > > > > + conf.autoneg = state->an_enabled; > > > > > + conf.pause = state->pause; > > > > > + conf.duplex = state->duplex; > > > > > + conf.power_down = false; > > > > > + conf.portmode = state->interface; > > > > > + > > > > > + if (state->speed == SPEED_UNKNOWN) { > > > > > + /* When a SFP is plugged in we use capabilities to > > > > > + * default to the highest supported speed > > > > > + */ > > > > > > > > This looks suspicious. > > > > > > Yes, it looks highly suspicious. The fact that > > > sparx5_phylink_mac_link_up() is empty, and sparx5_phylink_mac_config() > > > does all the work suggests that this was developed before the phylink > > > re-organisation, and this code hasn't been updated for it. > > > > > > Any new code for the kernel really ought to be updated for the new > > > phylink methodology before it is accepted. > > > > > > Looking at sparx5_port_config(), it also seems to use > > > PHY_INTERFACE_MODE_1000BASEX for both 1000BASE-X and 2500BASE-X. All > > > very well for the driver to do that internally, but it's confusing > > > when it comes to reviewing this stuff, especially when people outside > > > of the driver (such as myself) reviewing it need to understand what's > > > going on with the configuration. > > > > Hi Russell, > > > There are other issues too. > > > > Looking at sparx5_get_1000basex_status(), we have: > > > > + status->link = DEV2G5_PCS1G_LINK_STATUS_LINK_STATUS_GET(value) | > > + DEV2G5_PCS1G_LINK_STATUS_SYNC_STATUS_GET(value); > > > > > Why is the link status the logical OR of these? > > Oops: It should have been AND. Well spotted. Do you need to check the sync status? Isn't it impossible to have "link up" on a link that is unsynchronised? -- RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
On Mon, Nov 30, 2020 at 03:39:08PM +0100, Steen Hegelund wrote: > On 29.11.2020 11:28, Russell King - ARM Linux admin wrote: > > EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > > > > On Sun, Nov 29, 2020 at 10:52:45AM +0000, Russell King - ARM Linux admin wrote: > > > On Sat, Nov 28, 2020 at 10:28:28PM +0000, Russell King - ARM Linux admin wrote: > > > > On Sat, Nov 28, 2020 at 08:06:16PM +0100, Andrew Lunn wrote: > > > > > > +static void sparx5_phylink_mac_config(struct phylink_config *config, > > > > > > + unsigned int mode, > > > > > > + const struct phylink_link_state *state) > > > > > > +{ > > > > > > + struct sparx5_port *port = netdev_priv(to_net_dev(config->dev)); > > > > > > + struct sparx5_port_config conf; > > > > > > + int err = 0; > > > > > > + > > > > > > + conf = port->conf; > > > > > > + conf.autoneg = state->an_enabled; > > > > > > + conf.pause = state->pause; > > > > > > + conf.duplex = state->duplex; > > > > > > + conf.power_down = false; > > > > > > + conf.portmode = state->interface; > > > > > > + > > > > > > + if (state->speed == SPEED_UNKNOWN) { > > > > > > + /* When a SFP is plugged in we use capabilities to > > > > > > + * default to the highest supported speed > > > > > > + */ > > > > > > > > > > This looks suspicious. > > > > > > > > Yes, it looks highly suspicious. The fact that > > > > sparx5_phylink_mac_link_up() is empty, and sparx5_phylink_mac_config() > > > > does all the work suggests that this was developed before the phylink > > > > re-organisation, and this code hasn't been updated for it. > > > > > > > > Any new code for the kernel really ought to be updated for the new > > > > phylink methodology before it is accepted. > > > > > > > > Looking at sparx5_port_config(), it also seems to use > > > > PHY_INTERFACE_MODE_1000BASEX for both 1000BASE-X and 2500BASE-X. All > > > > very well for the driver to do that internally, but it's confusing > > > > when it comes to reviewing this stuff, especially when people outside > > > > of the driver (such as myself) reviewing it need to understand what's > > > > going on with the configuration. > > > > > > There are other issues too. > > > > > > Looking at sparx5_get_1000basex_status(), we have: > > > > > > + status->link = DEV2G5_PCS1G_LINK_STATUS_LINK_STATUS_GET(value) | > > > + DEV2G5_PCS1G_LINK_STATUS_SYNC_STATUS_GET(value); > > > > > > Why is the link status the logical OR of these? > > > > > > + if ((lp_abil >> 8) & 1) /* symmetric pause */ > > > + status->pause = MLO_PAUSE_RX | MLO_PAUSE_TX; > > > + if (lp_abil & (1 << 7)) /* asymmetric pause */ > > > + status->pause |= MLO_PAUSE_RX; > > > > > > is actually wrong, and I see I need to improve the documentation for > > > mac_pcs_get_state(). The intention in the documentation was concerning > > > hardware that indicated the _resolved_ status of pause modes. It was > > > not intended that drivers resolve the pause modes themselves. > > > > > > Even so, the above is still wrong; it takes no account of what is being > > > advertised at the local end. If one looks at the implementation in > > > phylink_decode_c37_word(), one will notice there is code to deal with > > > this. > > > > > > I think we ought to make phylink_decode_c37_word() and > > > phylink_decode_sgmii_word() public functions, and then this driver can > > > use these helpers to decode the link partner advertisement to the > > > phylink state. > > > > > > Does the driver need to provide an ethtool .get_link function? That > > > seems to bypass phylink. Why can't ethtool_op_get_link() be used? > > > > > > I think if ethtool_op_get_link() is used, we then have just one caller > > > for sparx5_get_port_status(), which means "struct sparx5_port_status" > > > can be eliminated and the code cleaned up to use the phylink decoding > > > helpers. > > > > (Sorry, I keep spotting bits in the code - it's really not an easy > > chunk of code to review.) > > > > I'm also not sure that this is really correct: > > > > + status->serdes_link = !phy_validate(port->serdes, PHY_MODE_ETHERNET, > > + port->conf.portmode, NULL); > > > > The documentation for phy_validate() says: > > > > * Used to check that the current set of parameters can be handled by > > * the phy. Implementations are free to tune the parameters passed as > > * arguments if needed by some implementation detail or > > * constraints. It will not change any actual configuration of the > > * PHY, so calling it as many times as deemed fit will have no side > > * effect. > > > > and clearly, passing NULL for opts, gives the function no opportunity > > to do what it's intended, so phy_validate() is being used for some > > other purpose than that which the drivers/phy subsystem intends it to > > be used for. > > Hi Russell, > > Yes this is a bit of an overload of the phy_validate(). > > The Serdes driver validates the portmode, and if OK, it returns the > current state of the link (bool), so that the client (SwitchDev) can know if the > link parameters so far results in a operational link. It does not change > any configuration. This seems very strange. What is the point of asking for a portmode which could be different from the current mode, and returning the link state for the current mode? I don't think that there is an alternative interface - maybe it would be a better idea to propose a new interface to the drivers/phy maintainers, rather than bodging an existing interface for your needs? Thanks. -- RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!
On 30.11.2020 15:05, Andrew Lunn wrote: >EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > >On Mon, Nov 30, 2020 at 02:09:34PM +0100, Steen Hegelund wrote: >> On 27.11.2020 18:00, Andrew Lunn wrote: >> > EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe >> > >> > > + reg-names: >> > > + minItems: 153 >> > > + items: >> > > + - const: dev2g5_0 >> > > + - const: dev5g_0 >> > > + - const: pcs5g_br_0 >> > > + - const: dev2g5_1 >> > > + - const: dev5g_1 >> > ... >> > > + - const: ana_ac >> > > + - const: vop >> > >> > > + switch: switch@600000000 { >> > > + compatible = "microchip,sparx5-switch"; >> > > + reg = <0x10004000 0x4000>, /* dev2g5_0 */ >> > > + <0x10008000 0x4000>, /* dev5g_0 */ >> > > + <0x1000c000 0x4000>, /* pcs5g_br_0 */ >> > > + <0x10010000 0x4000>, /* dev2g5_1 */ >> > > + <0x10014000 0x4000>, /* dev5g_1 */ >> > >> > ... >> > >> > > + <0x11800000 0x100000>, /* ana_l2 */ >> > > + <0x11900000 0x100000>, /* ana_ac */ >> > > + <0x11a00000 0x100000>; /* vop */ >> > >> > This is a pretty unusual binding. >> > >> > Why is it not >> > >> > reg = <0x10004000 0x1af8000> >> > >> > and the driver can then break up the memory into its sub ranges? >> > >> > Andrew >> Hi Andrew, >> >> Since the targets used by the driver is not always in the natural >> address order (e.g. the dev2g5_x targets), I thought it best to let the DT >> take care of this since this cannot be probed. I am aware that this causes >> extra mappings compared to the one-range strategy, but this layout seems more >> transparent to me, also when mapped over PCIe. > >The question is, do you have a device tree usage for this? Are there >devices in the family which have the regions in a different order? > Hi Andrew, Yes we do have more chips in the pipeline that we expect to be able to use this driver. But I see your point. The new chips most certainly will have a different number of targets (that will most likely map to different addresses). The bindings will need to be able to cope with the different number of targets, and I think that will be difficult. So all in all I think I will rework this, and make the mapping in the driver instead. >You can easily move this table into the driver, and let the driver >break the region up. That would be normal. > > Andrew BR Steen --------------------------------------- Steen Hegelund steen.hegelund@microchip.com
> Hmm. I will have to revisit this again. The intent was to be able to > destinguish between regular PHYs and SFPs (as read from the DT). > But maybe the phylink_of_phy_connect function handles this > automatically... Yes, you should not have to differentiate between an SFP and a traditional copper PHY. phylink will handle it all. > > > > > +void sparx5_destroy_netdev(struct sparx5 *sparx5, struct sparx5_port *port) > > > +{ > > > + if (port->phylink) { > > > + /* Disconnect the phy */ > > > + if (rtnl_trylock()) { > > > > Why do you use rtnl_trylock()? > > The sparx5_port_stop() in turn calls phylink_stop() that expects the lock > to be taken. Should I rather just call rtnl_lock()? Yes, you don't want to not call phylink_stop(). Andrew
Andrew Lunn writes: >> +static void sparx5_attr_stp_state_set(struct sparx5_port *port, >> + struct switchdev_trans *trans, >> + u8 state) >> +{ >> + struct sparx5 *sparx5 = port->sparx5; >> + >> + if (!test_bit(port->portno, sparx5->bridge_mask)) { >> + netdev_err(port->ndev, >> + "Controlling non-bridged port %d?\n", port->portno); >> + return; >> + } >> + >> + switch (state) { >> + case BR_STATE_FORWARDING: >> + set_bit(port->portno, sparx5->bridge_fwd_mask); >> + break; >> + default: >> + clear_bit(port->portno, sparx5->bridge_fwd_mask); >> + break; >> + } > > That is pretty odd. What about listening, learning, blocking? > This only handles simple forward/block. We'll add the learning state as well. >> +static int sparx5_port_bridge_join(struct sparx5_port *port, >> + struct net_device *bridge) >> +{ >> + struct sparx5 *sparx5 = port->sparx5; >> + >> + if (bitmap_empty(sparx5->bridge_mask, SPX5_PORTS)) >> + /* First bridged port */ >> + sparx5->hw_bridge_dev = bridge; >> + else >> + if (sparx5->hw_bridge_dev != bridge) >> + /* This is adding the port to a second bridge, this is >> + * unsupported >> + */ >> + return -ENODEV; >> + >> + set_bit(port->portno, sparx5->bridge_mask); >> + >> + /* Port enters in bridge mode therefor don't need to copy to CPU >> + * frames for multicast in case the bridge is not requesting them >> + */ >> + __dev_mc_unsync(port->ndev, sparx5_mc_unsync); >> + >> + return 0; >> +} > > This looks suspiciously empty? Don't you need to tell the hardware > which ports this port is bridges to? Normally you see some code which > walks all the ports and finds those in the same bridge, and sets a bit > which allows these ports to talk to each other. Is that code somewhere > else? > This is applied when the STP state is handled - sparx5_update_fwd(). This is pretty much as in the ocelot driver, which can a somewhat similar switch - and driver - architecture. > Andrew Thank you for your comments, ---Lars -- Lars Povlsen, Microchip
Mon, Nov 30, 2020 at 02:13:35PM CET, steen.hegelund@microchip.com wrote: >On 27.11.2020 18:15, Andrew Lunn wrote: >> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe >> >> This is a very large driver, which is going to make it slow to review. >Hi Andrew, > >Yes I am aware of that, but I think that what is available with this >series, makes for a nice package that can be tested by us, and used by >our customers. Could you perhaps cut it into multiple patches for easier review? Like the basics, host delivery, fwd offload, etc?