Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next...
authorDavid S. Miller <davem@davemloft.net>
Sun, 29 Mar 2015 20:38:08 +0000 (13:38 -0700)
committerDavid S. Miller <davem@davemloft.net>
Sun, 29 Mar 2015 20:38:08 +0000 (13:38 -0700)
Jeff Kirsher says:

====================
Intel Wired LAN Driver Updates 2015-03-27

This series contains updates to i40e and i40evf.

Jesse adds new device IDs to handle the new 20G speed for KR2.

Mitch provides a fix for an issue that shows up as a panic or memory
corruption when the device is brought down while under heavy stress.
This is resolved by delaying the releasing of resources until we
receive acknowledgment from the PF driver that the rings have indeed
been stopped.  Also adds firmware version information to ethtool
reporting to align with ixgbevf behavior.

Akeem increases the polling loop limiter, sine we found that in
certain circumstances the firmware can take longer to be ready after
a reset.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
63 files changed:
Documentation/devicetree/bindings/net/apm-xgene-enet.txt
arch/arm64/boot/dts/apm/apm-storm.dtsi
drivers/net/bonding/bond_main.c
drivers/net/dsa/mv88e6123_61_65.c
drivers/net/dsa/mv88e6171.c
drivers/net/dsa/mv88e6352.c
drivers/net/dsa/mv88e6xxx.c
drivers/net/dsa/mv88e6xxx.h
drivers/net/ethernet/apm/xgene/xgene_enet_main.c
drivers/net/ethernet/apm/xgene/xgene_enet_main.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/broadcom/genet/bcmgenet.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c
drivers/net/ethernet/emulex/benet/be.h
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
drivers/net/ethernet/smsc/smc91x.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/hyperv/rndis_filter.c
drivers/net/team/team.c
drivers/net/virtio_net.c
include/linux/bpf.h
include/linux/if_vlan.h
include/linux/netdevice.h
include/linux/rhashtable.h
include/net/dsa.h
include/net/netfilter/nf_tables.h
include/net/tcp.h
include/uapi/linux/bpf.h
kernel/bpf/verifier.c
lib/rhashtable.c
net/8021q/vlan_dev.c
net/core/dev.c
net/core/filter.c
net/dsa/slave.c
net/ipv4/netfilter/nf_log_arp.c
net/ipv4/netfilter/nf_log_ipv4.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv6/fib6_rules.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/netfilter/nf_log_ipv6.c
net/netfilter/Kconfig
net/netfilter/nf_tables_api.c
net/netfilter/nf_tables_core.c
net/netfilter/nft_hash.c
net/netfilter/nft_log.c
net/netfilter/nft_lookup.c
net/netfilter/nft_meta.c
net/netfilter/nft_rbtree.c
net/netlink/af_netlink.c
net/tipc/bcast.c
net/tipc/bcast.h
net/tipc/discover.c
net/tipc/link.c
net/tipc/name_distr.c
net/tipc/node.c
net/tipc/node.h

index dc7961b330765070323daf07dd0e3127b42de46a..f55aa280d34f9fae6e25170f41a06e5c13104f06 100644 (file)
@@ -14,7 +14,10 @@ Required properties for all the ethernet interfaces:
   - "enet_csr": Ethernet control and status register address space
   - "ring_csr": Descriptor ring control and status register address space
   - "ring_cmd": Descriptor ring command register address space
-- interrupts: Ethernet main interrupt
+- interrupts: Two interrupt specifiers can be specified.
+  - First is the Rx interrupt.  This irq is mandatory.
+  - Second is the Tx completion interrupt.
+    This is supported only on SGMII based 1GbE and 10GbE interfaces.
 - port-id: Port number (0 or 1)
 - clocks: Reference to the clock entry.
 - local-mac-address: MAC address assigned to this device
index c1eb6911e5397b676c337b15d84b707a64b967d1..e74f6e0a208ccbf584d2b2d61c98d3d0f1d169e2 100644 (file)
                              <0x0 0x1f200000 0x0 0Xc300>,
                              <0x0 0x1B000000 0x0 0X200>;
                        reg-names = "enet_csr", "ring_csr", "ring_cmd";
-                       interrupts = <0x0 0xA0 0x4>;
+                       interrupts = <0x0 0xA0 0x4>,
+                                    <0x0 0xA1 0x4>;
                        dma-coherent;
                        clocks = <&sge0clk 0>;
                        local-mac-address = [00 00 00 00 00 00];
                              <0x0 0x1f200000 0x0 0Xc300>,
                              <0x0 0x1B000000 0x0 0X8000>;
                        reg-names = "enet_csr", "ring_csr", "ring_cmd";
-                       interrupts = <0x0 0xAC 0x4>;
+                       interrupts = <0x0 0xAC 0x4>,
+                                    <0x0 0xAD 0x4>;
                        port-id = <1>;
                        dma-coherent;
                        clocks = <&sge1clk 0>;
                              <0x0 0x1f600000 0x0 0Xc300>,
                              <0x0 0x18000000 0x0 0X200>;
                        reg-names = "enet_csr", "ring_csr", "ring_cmd";
-                       interrupts = <0x0 0x60 0x4>;
+                       interrupts = <0x0 0x60 0x4>,
+                                    <0x0 0x61 0x4>;
                        dma-coherent;
                        clocks = <&xge0clk 0>;
                        /* mac address will be overwritten by the bootloader */
index c026ce9cd7b6f52f1a6bff88b9e6053b13ecebcd..7b4684ccdb3fae520e568638e6a3fb240a388d12 100644 (file)
@@ -4038,6 +4038,7 @@ static const struct net_device_ops bond_netdev_ops = {
        .ndo_fix_features       = bond_fix_features,
        .ndo_bridge_setlink     = ndo_dflt_netdev_switch_port_bridge_setlink,
        .ndo_bridge_dellink     = ndo_dflt_netdev_switch_port_bridge_dellink,
+       .ndo_features_check     = passthru_features_check,
 };
 
 static const struct device_type bond_type = {
index e9c736e1cef3a20cd599f6347be20de73127a0ae..2d7e1ffe9fdc49664ca4d2686b3339c447d00589 100644 (file)
@@ -222,28 +222,6 @@ static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p)
                val |= 0x000c;
        REG_WRITE(addr, 0x04, val);
 
-       /* Port Control 1: disable trunking.  Also, if this is the
-        * CPU port, enable learn messages to be sent to this port.
-        */
-       REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000);
-
-       /* Port based VLAN map: give each port its own address
-        * database, allow the CPU port to talk to each of the 'real'
-        * ports, and allow each of the 'real' ports to only talk to
-        * the upstream port.
-        */
-       val = (p & 0xf) << 12;
-       if (dsa_is_cpu_port(ds, p))
-               val |= ds->phys_port_mask;
-       else
-               val |= 1 << dsa_upstream_port(ds);
-       REG_WRITE(addr, 0x06, val);
-
-       /* Default VLAN ID and priority: don't set a default VLAN
-        * ID, and set the default packet priority to zero.
-        */
-       REG_WRITE(addr, 0x07, 0x0000);
-
        /* Port Control 2: don't force a good FCS, set the maximum
         * frame size to 10240 bytes, don't let the switch add or
         * strip 802.1q tags, don't discard tagged or untagged frames
@@ -288,18 +266,17 @@ static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p)
         */
        REG_WRITE(addr, 0x19, 0x7654);
 
-       return 0;
+       return mv88e6xxx_setup_port_common(ds, p);
 }
 
 static int mv88e6123_61_65_setup(struct dsa_switch *ds)
 {
-       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
        int i;
        int ret;
 
-       mutex_init(&ps->smi_mutex);
-       mutex_init(&ps->stats_mutex);
-       mutex_init(&ps->phy_mutex);
+       ret = mv88e6xxx_setup_common(ds);
+       if (ret < 0)
+               return ret;
 
        ret = mv88e6123_61_65_switch_reset(ds);
        if (ret < 0)
index 9808c860a797f713bbe0bc9b5d3a92e598848922..18cfead83dc94851d4bedc0cfca6f8f0eaf43797 100644 (file)
 #include <net/dsa.h>
 #include "mv88e6xxx.h"
 
+/* Switch product IDs */
+#define ID_6171        0x1710
+#define ID_6172        0x1720
+
 static char *mv88e6171_probe(struct device *host_dev, int sw_addr)
 {
        struct mii_bus *bus = dsa_host_dev_to_mii_bus(host_dev);
@@ -27,9 +31,9 @@ static char *mv88e6171_probe(struct device *host_dev, int sw_addr)
 
        ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
        if (ret >= 0) {
-               if ((ret & 0xfff0) == 0x1710)
+               if ((ret & 0xfff0) == ID_6171)
                        return "Marvell 88E6171";
-               if ((ret & 0xfff0) == 0x1720)
+               if ((ret & 0xfff0) == ID_6172)
                        return "Marvell 88E6172";
        }
 
@@ -221,28 +225,6 @@ static int mv88e6171_setup_port(struct dsa_switch *ds, int p)
                val |= 0x000c;
        REG_WRITE(addr, 0x04, val);
 
-       /* Port Control 1: disable trunking.  Also, if this is the
-        * CPU port, enable learn messages to be sent to this port.
-        */
-       REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000);
-
-       /* Port based VLAN map: give each port its own address
-        * database, allow the CPU port to talk to each of the 'real'
-        * ports, and allow each of the 'real' ports to only talk to
-        * the upstream port.
-        */
-       val = (p & 0xf) << 12;
-       if (dsa_is_cpu_port(ds, p))
-               val |= ds->phys_port_mask;
-       else
-               val |= 1 << dsa_upstream_port(ds);
-       REG_WRITE(addr, 0x06, val);
-
-       /* Default VLAN ID and priority: don't set a default VLAN
-        * ID, and set the default packet priority to zero.
-        */
-       REG_WRITE(addr, 0x07, 0x0000);
-
        /* Port Control 2: don't force a good FCS, set the maximum
         * frame size to 10240 bytes, don't let the switch add or
         * strip 802.1q tags, don't discard tagged or untagged frames
@@ -287,17 +269,17 @@ static int mv88e6171_setup_port(struct dsa_switch *ds, int p)
         */
        REG_WRITE(addr, 0x19, 0x7654);
 
-       return 0;
+       return mv88e6xxx_setup_port_common(ds, p);
 }
 
 static int mv88e6171_setup(struct dsa_switch *ds)
 {
-       struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
        int i;
        int ret;
 
-       mutex_init(&ps->smi_mutex);
-       mutex_init(&ps->stats_mutex);
+       ret = mv88e6xxx_setup_common(ds);
+       if (ret < 0)
+               return ret;
 
        ret = mv88e6171_switch_reset(ds);
        if (ret < 0)
@@ -318,8 +300,6 @@ static int mv88e6171_setup(struct dsa_switch *ds)
                        return ret;
        }
 
-       mutex_init(&ps->phy_mutex);
-
        return 0;
 }
 
@@ -410,6 +390,28 @@ static int mv88e6171_get_sset_count(struct dsa_switch *ds)
        return ARRAY_SIZE(mv88e6171_hw_stats);
 }
 
+static int mv88e6171_get_eee(struct dsa_switch *ds, int port,
+                            struct ethtool_eee *e)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+
+       if (ps->id == ID_6172)
+               return mv88e6xxx_get_eee(ds, port, e);
+
+       return -EOPNOTSUPP;
+}
+
+static int mv88e6171_set_eee(struct dsa_switch *ds, int port,
+                            struct phy_device *phydev, struct ethtool_eee *e)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+
+       if (ps->id == ID_6172)
+               return mv88e6xxx_set_eee(ds, port, phydev, e);
+
+       return -EOPNOTSUPP;
+}
+
 struct dsa_switch_driver mv88e6171_switch_driver = {
        .tag_protocol           = DSA_TAG_PROTO_EDSA,
        .priv_size              = sizeof(struct mv88e6xxx_priv_state),
@@ -422,11 +424,19 @@ struct dsa_switch_driver mv88e6171_switch_driver = {
        .get_strings            = mv88e6171_get_strings,
        .get_ethtool_stats      = mv88e6171_get_ethtool_stats,
        .get_sset_count         = mv88e6171_get_sset_count,
+       .set_eee                = mv88e6171_set_eee,
+       .get_eee                = mv88e6171_get_eee,
 #ifdef CONFIG_NET_DSA_HWMON
        .get_temp               = mv88e6xxx_get_temp,
 #endif
        .get_regs_len           = mv88e6xxx_get_regs_len,
        .get_regs               = mv88e6xxx_get_regs,
+       .port_join_bridge       = mv88e6xxx_join_bridge,
+       .port_leave_bridge      = mv88e6xxx_leave_bridge,
+       .port_stp_update        = mv88e6xxx_port_stp_update,
+       .fdb_add                = mv88e6xxx_port_fdb_add,
+       .fdb_del                = mv88e6xxx_port_fdb_del,
+       .fdb_getnext            = mv88e6xxx_port_fdb_getnext,
 };
 
 MODULE_ALIAS("platform:mv88e6171");
index 7bc5998384c68fac1dbf2654c97f275e419469fd..41fe3a6a72d1fa213239f47c68cec5bf34bb92cb 100644 (file)
@@ -215,28 +215,6 @@ static int mv88e6352_setup_port(struct dsa_switch *ds, int p)
                val |= 0x000c;
        REG_WRITE(addr, 0x04, val);
 
-       /* Port Control 1: disable trunking.  Also, if this is the
-        * CPU port, enable learn messages to be sent to this port.
-        */
-       REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000);
-
-       /* Port based VLAN map: give each port its own address
-        * database, allow the CPU port to talk to each of the 'real'
-        * ports, and allow each of the 'real' ports to only talk to
-        * the upstream port.
-        */
-       val = (p & 0xf) << 12;
-       if (dsa_is_cpu_port(ds, p))
-               val |= ds->phys_port_mask;
-       else
-               val |= 1 << dsa_upstream_port(ds);
-       REG_WRITE(addr, 0x06, val);
-
-       /* Default VLAN ID and priority: don't set a default VLAN
-        * ID, and set the default packet priority to zero.
-        */
-       REG_WRITE(addr, 0x07, 0x0000);
-
        /* Port Control 2: don't force a good FCS, set the maximum
         * frame size to 10240 bytes, don't let the switch add or
         * strip 802.1q tags, don't discard tagged or untagged frames
@@ -281,7 +259,7 @@ static int mv88e6352_setup_port(struct dsa_switch *ds, int p)
         */
        REG_WRITE(addr, 0x19, 0x7654);
 
-       return 0;
+       return mv88e6xxx_setup_port_common(ds, p);
 }
 
 #ifdef CONFIG_NET_DSA_HWMON
@@ -385,12 +363,11 @@ static int mv88e6352_setup(struct dsa_switch *ds)
        int ret;
        int i;
 
-       mutex_init(&ps->smi_mutex);
-       mutex_init(&ps->stats_mutex);
-       mutex_init(&ps->phy_mutex);
-       mutex_init(&ps->eeprom_mutex);
+       ret = mv88e6xxx_setup_common(ds);
+       if (ret < 0)
+               return ret;
 
-       ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0;
+       mutex_init(&ps->eeprom_mutex);
 
        ret = mv88e6352_switch_reset(ds);
        if (ret < 0)
@@ -729,6 +706,12 @@ struct dsa_switch_driver mv88e6352_switch_driver = {
        .set_eeprom             = mv88e6352_set_eeprom,
        .get_regs_len           = mv88e6xxx_get_regs_len,
        .get_regs               = mv88e6xxx_get_regs,
+       .port_join_bridge       = mv88e6xxx_join_bridge,
+       .port_leave_bridge      = mv88e6xxx_leave_bridge,
+       .port_stp_update        = mv88e6xxx_port_stp_update,
+       .fdb_add                = mv88e6xxx_port_fdb_add,
+       .fdb_del                = mv88e6xxx_port_fdb_del,
+       .fdb_getnext            = mv88e6xxx_port_fdb_getnext,
 };
 
 MODULE_ALIAS("platform:mv88e6352");
index c18ffc98aaccf126874590630484912cc5dd47ae..13572cc24c6dc42a308e4bc0b660f3c0f673e48d 100644 (file)
@@ -9,6 +9,8 @@
  */
 
 #include <linux/delay.h>
+#include <linux/etherdevice.h>
+#include <linux/if_bridge.h>
 #include <linux/jiffies.h>
 #include <linux/list.h>
 #include <linux/module.h>
@@ -72,19 +74,16 @@ int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg)
        return ret & 0xffff;
 }
 
-int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg)
+/* Must be called with SMI mutex held */
+static int _mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg)
 {
-       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
        struct mii_bus *bus = dsa_host_dev_to_mii_bus(ds->master_dev);
        int ret;
 
        if (bus == NULL)
                return -EINVAL;
 
-       mutex_lock(&ps->smi_mutex);
        ret = __mv88e6xxx_reg_read(bus, ds->pd->sw_addr, addr, reg);
-       mutex_unlock(&ps->smi_mutex);
-
        if (ret < 0)
                return ret;
 
@@ -94,6 +93,18 @@ int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg)
        return ret;
 }
 
+int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+       int ret;
+
+       mutex_lock(&ps->smi_mutex);
+       ret = _mv88e6xxx_reg_read(ds, addr, reg);
+       mutex_unlock(&ps->smi_mutex);
+
+       return ret;
+}
+
 int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
                          int reg, u16 val)
 {
@@ -125,11 +136,11 @@ int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
        return 0;
 }
 
-int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
+/* Must be called with SMI mutex held */
+static int _mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg,
+                               u16 val)
 {
-       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
        struct mii_bus *bus = dsa_host_dev_to_mii_bus(ds->master_dev);
-       int ret;
 
        if (bus == NULL)
                return -EINVAL;
@@ -137,8 +148,16 @@ int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
        dev_dbg(ds->master_dev, "-> addr: 0x%.2x reg: 0x%.2x val: 0x%.4x\n",
                addr, reg, val);
 
+       return __mv88e6xxx_reg_write(bus, ds->pd->sw_addr, addr, reg, val);
+}
+
+int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+       int ret;
+
        mutex_lock(&ps->smi_mutex);
-       ret = __mv88e6xxx_reg_write(bus, ds->pd->sw_addr, addr, reg, val);
+       ret = _mv88e6xxx_reg_write(ds, addr, reg, val);
        mutex_unlock(&ps->smi_mutex);
 
        return ret;
@@ -627,6 +646,31 @@ int mv88e6xxx_eeprom_busy_wait(struct dsa_switch *ds)
        return mv88e6xxx_wait(ds, REG_GLOBAL2, 0x14, 0x8000);
 }
 
+/* Must be called with SMI lock held */
+static int _mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, u16 mask)
+{
+       unsigned long timeout = jiffies + HZ / 10;
+
+       while (time_before(jiffies, timeout)) {
+               int ret;
+
+               ret = _mv88e6xxx_reg_read(ds, reg, offset);
+               if (ret < 0)
+                       return ret;
+               if (!(ret & mask))
+                       return 0;
+
+               usleep_range(1000, 2000);
+       }
+       return -ETIMEDOUT;
+}
+
+/* Must be called with SMI lock held */
+static int _mv88e6xxx_atu_wait(struct dsa_switch *ds)
+{
+       return _mv88e6xxx_wait(ds, REG_GLOBAL, 0x0b, ATU_BUSY);
+}
+
 int mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int addr, int regnum)
 {
        int ret;
@@ -700,6 +744,423 @@ int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
        return 0;
 }
 
+static int _mv88e6xxx_atu_cmd(struct dsa_switch *ds, int fid, u16 cmd)
+{
+       int ret;
+
+       ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x01, fid);
+       if (ret < 0)
+               return ret;
+
+       ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x0b, cmd);
+       if (ret < 0)
+               return ret;
+
+       return _mv88e6xxx_atu_wait(ds);
+}
+
+static int _mv88e6xxx_flush_fid(struct dsa_switch *ds, int fid)
+{
+       int ret;
+
+       ret = _mv88e6xxx_atu_wait(ds);
+       if (ret < 0)
+               return ret;
+
+       return _mv88e6xxx_atu_cmd(ds, fid, ATU_CMD_FLUSH_NONSTATIC_FID);
+}
+
+static int mv88e6xxx_set_port_state(struct dsa_switch *ds, int port, u8 state)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+       int reg, ret;
+       u8 oldstate;
+
+       mutex_lock(&ps->smi_mutex);
+
+       reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), 0x04);
+       if (reg < 0)
+               goto abort;
+
+       oldstate = reg & PSTATE_MASK;
+       if (oldstate != state) {
+               /* Flush forwarding database if we're moving a port
+                * from Learning or Forwarding state to Disabled or
+                * Blocking or Listening state.
+                */
+               if (oldstate >= PSTATE_LEARNING && state <= PSTATE_BLOCKING) {
+                       ret = _mv88e6xxx_flush_fid(ds, ps->fid[port]);
+                       if (ret)
+                               goto abort;
+               }
+               reg = (reg & ~PSTATE_MASK) | state;
+               ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x04, reg);
+       }
+
+abort:
+       mutex_unlock(&ps->smi_mutex);
+       return ret;
+}
+
+/* Must be called with smi lock held */
+static int _mv88e6xxx_update_port_config(struct dsa_switch *ds, int port)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+       u8 fid = ps->fid[port];
+       u16 reg = fid << 12;
+
+       if (dsa_is_cpu_port(ds, port))
+               reg |= ds->phys_port_mask;
+       else
+               reg |= (ps->bridge_mask[fid] |
+                      (1 << dsa_upstream_port(ds))) & ~(1 << port);
+
+       return _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x06, reg);
+}
+
+/* Must be called with smi lock held */
+static int _mv88e6xxx_update_bridge_config(struct dsa_switch *ds, int fid)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+       int port;
+       u32 mask;
+       int ret;
+
+       mask = ds->phys_port_mask;
+       while (mask) {
+               port = __ffs(mask);
+               mask &= ~(1 << port);
+               if (ps->fid[port] != fid)
+                       continue;
+
+               ret = _mv88e6xxx_update_port_config(ds, port);
+               if (ret)
+                       return ret;
+       }
+
+       return _mv88e6xxx_flush_fid(ds, fid);
+}
+
+/* Bridge handling functions */
+
+int mv88e6xxx_join_bridge(struct dsa_switch *ds, int port, u32 br_port_mask)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+       int ret = 0;
+       u32 nmask;
+       int fid;
+
+       /* If the bridge group is not empty, join that group.
+        * Otherwise create a new group.
+        */
+       fid = ps->fid[port];
+       nmask = br_port_mask & ~(1 << port);
+       if (nmask)
+               fid = ps->fid[__ffs(nmask)];
+
+       nmask = ps->bridge_mask[fid] | (1 << port);
+       if (nmask != br_port_mask) {
+               netdev_err(ds->ports[port],
+                          "join: Bridge port mask mismatch fid=%d mask=0x%x expected 0x%x\n",
+                          fid, br_port_mask, nmask);
+               return -EINVAL;
+       }
+
+       mutex_lock(&ps->smi_mutex);
+
+       ps->bridge_mask[fid] = br_port_mask;
+
+       if (fid != ps->fid[port]) {
+               ps->fid_mask |= 1 << ps->fid[port];
+               ps->fid[port] = fid;
+               ret = _mv88e6xxx_update_bridge_config(ds, fid);
+       }
+
+       mutex_unlock(&ps->smi_mutex);
+
+       return ret;
+}
+
+int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int port, u32 br_port_mask)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+       u8 fid, newfid;
+       int ret;
+
+       fid = ps->fid[port];
+
+       if (ps->bridge_mask[fid] != br_port_mask) {
+               netdev_err(ds->ports[port],
+                          "leave: Bridge port mask mismatch fid=%d mask=0x%x expected 0x%x\n",
+                          fid, br_port_mask, ps->bridge_mask[fid]);
+               return -EINVAL;
+       }
+
+       /* If the port was the last port of a bridge, we are done.
+        * Otherwise assign a new fid to the port, and fix up
+        * the bridge configuration.
+        */
+       if (br_port_mask == (1 << port))
+               return 0;
+
+       mutex_lock(&ps->smi_mutex);
+
+       newfid = __ffs(ps->fid_mask);
+       ps->fid[port] = newfid;
+       ps->fid_mask &= (1 << newfid);
+       ps->bridge_mask[fid] &= ~(1 << port);
+       ps->bridge_mask[newfid] = 1 << port;
+
+       ret = _mv88e6xxx_update_bridge_config(ds, fid);
+       if (!ret)
+               ret = _mv88e6xxx_update_bridge_config(ds, newfid);
+
+       mutex_unlock(&ps->smi_mutex);
+
+       return ret;
+}
+
+int mv88e6xxx_port_stp_update(struct dsa_switch *ds, int port, u8 state)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+       int stp_state;
+
+       switch (state) {
+       case BR_STATE_DISABLED:
+               stp_state = PSTATE_DISABLED;
+               break;
+       case BR_STATE_BLOCKING:
+       case BR_STATE_LISTENING:
+               stp_state = PSTATE_BLOCKING;
+               break;
+       case BR_STATE_LEARNING:
+               stp_state = PSTATE_LEARNING;
+               break;
+       case BR_STATE_FORWARDING:
+       default:
+               stp_state = PSTATE_FORWARDING;
+               break;
+       }
+
+       netdev_dbg(ds->ports[port], "port state %d [%d]\n", state, stp_state);
+
+       /* mv88e6xxx_port_stp_update may be called with softirqs disabled,
+        * so we can not update the port state directly but need to schedule it.
+        */
+       ps->port_state[port] = stp_state;
+       set_bit(port, &ps->port_state_update_mask);
+       schedule_work(&ps->bridge_work);
+
+       return 0;
+}
+
+static int __mv88e6xxx_write_addr(struct dsa_switch *ds,
+                                 const unsigned char *addr)
+{
+       int i, ret;
+
+       for (i = 0; i < 3; i++) {
+               ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x0d + i,
+                                       (addr[i * 2] << 8) | addr[i * 2 + 1]);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int __mv88e6xxx_read_addr(struct dsa_switch *ds, unsigned char *addr)
+{
+       int i, ret;
+
+       for (i = 0; i < 3; i++) {
+               ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x0d + i);
+               if (ret < 0)
+                       return ret;
+               addr[i * 2] = ret >> 8;
+               addr[i * 2 + 1] = ret & 0xff;
+       }
+
+       return 0;
+}
+
+static int __mv88e6xxx_port_fdb_cmd(struct dsa_switch *ds, int port,
+                                   const unsigned char *addr, int state)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+       u8 fid = ps->fid[port];
+       int ret;
+
+       ret = _mv88e6xxx_atu_wait(ds);
+       if (ret < 0)
+               return ret;
+
+       ret = __mv88e6xxx_write_addr(ds, addr);
+       if (ret < 0)
+               return ret;
+
+       ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x0c,
+                                  (0x10 << port) | state);
+       if (ret)
+               return ret;
+
+       ret = _mv88e6xxx_atu_cmd(ds, fid, ATU_CMD_LOAD_FID);
+
+       return ret;
+}
+
+int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
+                          const unsigned char *addr, u16 vid)
+{
+       int state = is_multicast_ether_addr(addr) ?
+                                       FDB_STATE_MC_STATIC : FDB_STATE_STATIC;
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+       int ret;
+
+       mutex_lock(&ps->smi_mutex);
+       ret = __mv88e6xxx_port_fdb_cmd(ds, port, addr, state);
+       mutex_unlock(&ps->smi_mutex);
+
+       return ret;
+}
+
+int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
+                          const unsigned char *addr, u16 vid)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+       int ret;
+
+       mutex_lock(&ps->smi_mutex);
+       ret = __mv88e6xxx_port_fdb_cmd(ds, port, addr, FDB_STATE_UNUSED);
+       mutex_unlock(&ps->smi_mutex);
+
+       return ret;
+}
+
+static int __mv88e6xxx_port_getnext(struct dsa_switch *ds, int port,
+                                   unsigned char *addr, bool *is_static)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+       u8 fid = ps->fid[port];
+       int ret, state;
+
+       ret = _mv88e6xxx_atu_wait(ds);
+       if (ret < 0)
+               return ret;
+
+       ret = __mv88e6xxx_write_addr(ds, addr);
+       if (ret < 0)
+               return ret;
+
+       do {
+               ret = _mv88e6xxx_atu_cmd(ds, fid, ATU_CMD_GETNEXT_FID);
+               if (ret < 0)
+                       return ret;
+
+               ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x0c);
+               if (ret < 0)
+                       return ret;
+               state = ret & FDB_STATE_MASK;
+               if (state == FDB_STATE_UNUSED)
+                       return -ENOENT;
+       } while (!(((ret >> 4) & 0xff) & (1 << port)));
+
+       ret = __mv88e6xxx_read_addr(ds, addr);
+       if (ret < 0)
+               return ret;
+
+       *is_static = state == (is_multicast_ether_addr(addr) ?
+                              FDB_STATE_MC_STATIC : FDB_STATE_STATIC);
+
+       return 0;
+}
+
+/* get next entry for port */
+int mv88e6xxx_port_fdb_getnext(struct dsa_switch *ds, int port,
+                              unsigned char *addr, bool *is_static)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+       int ret;
+
+       mutex_lock(&ps->smi_mutex);
+       ret = __mv88e6xxx_port_getnext(ds, port, addr, is_static);
+       mutex_unlock(&ps->smi_mutex);
+
+       return ret;
+}
+
+static void mv88e6xxx_bridge_work(struct work_struct *work)
+{
+       struct mv88e6xxx_priv_state *ps;
+       struct dsa_switch *ds;
+       int port;
+
+       ps = container_of(work, struct mv88e6xxx_priv_state, bridge_work);
+       ds = ((struct dsa_switch *)ps) - 1;
+
+       while (ps->port_state_update_mask) {
+               port = __ffs(ps->port_state_update_mask);
+               clear_bit(port, &ps->port_state_update_mask);
+               mv88e6xxx_set_port_state(ds, port, ps->port_state[port]);
+       }
+}
+
+int mv88e6xxx_setup_port_common(struct dsa_switch *ds, int port)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+       int ret, fid;
+
+       mutex_lock(&ps->smi_mutex);
+
+       /* Port Control 1: disable trunking, disable sending
+        * learning messages to this port.
+        */
+       ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x05, 0x0000);
+       if (ret)
+               goto abort;
+
+       /* Port based VLAN map: give each port its own address
+        * database, allow the CPU port to talk to each of the 'real'
+        * ports, and allow each of the 'real' ports to only talk to
+        * the upstream port.
+        */
+       fid = __ffs(ps->fid_mask);
+       ps->fid[port] = fid;
+       ps->fid_mask &= ~(1 << fid);
+
+       if (!dsa_is_cpu_port(ds, port))
+               ps->bridge_mask[fid] = 1 << port;
+
+       ret = _mv88e6xxx_update_port_config(ds, port);
+       if (ret)
+               goto abort;
+
+       /* Default VLAN ID and priority: don't set a default VLAN
+        * ID, and set the default packet priority to zero.
+        */
+       ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x07, 0x0000);
+abort:
+       mutex_unlock(&ps->smi_mutex);
+       return ret;
+}
+
+int mv88e6xxx_setup_common(struct dsa_switch *ds)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+
+       mutex_init(&ps->smi_mutex);
+       mutex_init(&ps->stats_mutex);
+       mutex_init(&ps->phy_mutex);
+
+       ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0;
+
+       ps->fid_mask = (1 << DSA_MAX_PORTS) - 1;
+
+       INIT_WORK(&ps->bridge_work, mv88e6xxx_bridge_work);
+
+       return 0;
+}
+
 static int __init mv88e6xxx_init(void)
 {
 #if IS_ENABLED(CONFIG_NET_DSA_MV88E6131)
index 5fd42ced90117c741c192bed8b03d71f060c9943..aaf239aba7260ddfc562e84b60c3883d9031733d 100644 (file)
 #define REG_GLOBAL             0x1b
 #define REG_GLOBAL2            0x1c
 
+/* ATU commands */
+
+#define ATU_BUSY                       0x8000
+
+#define ATU_CMD_LOAD_FID               (ATU_BUSY | 0x3000)
+#define ATU_CMD_GETNEXT_FID            (ATU_BUSY | 0x4000)
+#define ATU_CMD_FLUSH_NONSTATIC_FID    (ATU_BUSY | 0x6000)
+
+/* port states */
+
+#define PSTATE_MASK            0x03
+#define PSTATE_DISABLED                0x00
+#define PSTATE_BLOCKING                0x01
+#define PSTATE_LEARNING                0x02
+#define PSTATE_FORWARDING      0x03
+
+/* FDB states */
+
+#define FDB_STATE_MASK                 0x0f
+
+#define FDB_STATE_UNUSED               0x00
+#define FDB_STATE_MC_STATIC            0x07    /* static multicast */
+#define FDB_STATE_STATIC               0x0e    /* static unicast */
+
 struct mv88e6xxx_priv_state {
        /* When using multi-chip addressing, this mutex protects
         * access to the indirect access registers.  (In single-chip
@@ -49,6 +73,17 @@ struct mv88e6xxx_priv_state {
        struct mutex eeprom_mutex;
 
        int             id; /* switch product id */
+
+       /* hw bridging */
+
+       u32 fid_mask;
+       u8 fid[DSA_MAX_PORTS];
+       u16 bridge_mask[DSA_MAX_PORTS];
+
+       unsigned long port_state_update_mask;
+       u8 port_state[DSA_MAX_PORTS];
+
+       struct work_struct bridge_work;
 };
 
 struct mv88e6xxx_hw_stat {
@@ -57,6 +92,8 @@ struct mv88e6xxx_hw_stat {
        int reg;
 };
 
+int mv88e6xxx_setup_port_common(struct dsa_switch *ds, int port);
+int mv88e6xxx_setup_common(struct dsa_switch *ds);
 int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg);
 int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg);
 int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
@@ -91,6 +128,15 @@ int mv88e6xxx_phy_write_indirect(struct dsa_switch *ds, int addr, int regnum,
 int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e);
 int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
                      struct phy_device *phydev, struct ethtool_eee *e);
+int mv88e6xxx_join_bridge(struct dsa_switch *ds, int port, u32 br_port_mask);
+int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int port, u32 br_port_mask);
+int mv88e6xxx_port_stp_update(struct dsa_switch *ds, int port, u8 state);
+int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
+                          const unsigned char *addr, u16 vid);
+int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
+                          const unsigned char *addr, u16 vid);
+int mv88e6xxx_port_fdb_getnext(struct dsa_switch *ds, int port,
+                              unsigned char *addr, bool *is_static);
 
 extern struct dsa_switch_driver mv88e6131_switch_driver;
 extern struct dsa_switch_driver mv88e6123_61_65_switch_driver;
index 6146a993a13629f7b498c3564701ddaf53261301..40d3530d7f30966af178eb5890b2172cd3197b49 100644 (file)
@@ -428,13 +428,23 @@ static int xgene_enet_register_irq(struct net_device *ndev)
 {
        struct xgene_enet_pdata *pdata = netdev_priv(ndev);
        struct device *dev = ndev_to_dev(ndev);
+       struct xgene_enet_desc_ring *ring;
        int ret;
 
-       ret = devm_request_irq(dev, pdata->rx_ring->irq, xgene_enet_rx_irq,
-                              IRQF_SHARED, ndev->name, pdata->rx_ring);
-       if (ret) {
-               netdev_err(ndev, "rx%d interrupt request failed\n",
-                          pdata->rx_ring->irq);
+       ring = pdata->rx_ring;
+       ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq,
+                              IRQF_SHARED, ring->irq_name, ring);
+       if (ret)
+               netdev_err(ndev, "Failed to request irq %s\n", ring->irq_name);
+
+       if (pdata->cq_cnt) {
+               ring = pdata->tx_ring->cp_ring;
+               ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq,
+                                      IRQF_SHARED, ring->irq_name, ring);
+               if (ret) {
+                       netdev_err(ndev, "Failed to request irq %s\n",
+                                  ring->irq_name);
+               }
        }
 
        return ret;
@@ -448,6 +458,37 @@ static void xgene_enet_free_irq(struct net_device *ndev)
        pdata = netdev_priv(ndev);
        dev = ndev_to_dev(ndev);
        devm_free_irq(dev, pdata->rx_ring->irq, pdata->rx_ring);
+
+       if (pdata->cq_cnt) {
+               devm_free_irq(dev, pdata->tx_ring->cp_ring->irq,
+                             pdata->tx_ring->cp_ring);
+       }
+}
+
+static void xgene_enet_napi_enable(struct xgene_enet_pdata *pdata)
+{
+       struct napi_struct *napi;
+
+       napi = &pdata->rx_ring->napi;
+       napi_enable(napi);
+
+       if (pdata->cq_cnt) {
+               napi = &pdata->tx_ring->cp_ring->napi;
+               napi_enable(napi);
+       }
+}
+
+static void xgene_enet_napi_disable(struct xgene_enet_pdata *pdata)
+{
+       struct napi_struct *napi;
+
+       napi = &pdata->rx_ring->napi;
+       napi_disable(napi);
+
+       if (pdata->cq_cnt) {
+               napi = &pdata->tx_ring->cp_ring->napi;
+               napi_disable(napi);
+       }
 }
 
 static int xgene_enet_open(struct net_device *ndev)
@@ -462,7 +503,7 @@ static int xgene_enet_open(struct net_device *ndev)
        ret = xgene_enet_register_irq(ndev);
        if (ret)
                return ret;
-       napi_enable(&pdata->rx_ring->napi);
+       xgene_enet_napi_enable(pdata);
 
        if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
                phy_start(pdata->phy_dev);
@@ -486,7 +527,7 @@ static int xgene_enet_close(struct net_device *ndev)
        else
                cancel_delayed_work_sync(&pdata->link_work);
 
-       napi_disable(&pdata->rx_ring->napi);
+       xgene_enet_napi_disable(pdata);
        xgene_enet_free_irq(ndev);
        xgene_enet_process_ring(pdata->rx_ring, -1);
 
@@ -580,6 +621,8 @@ static void xgene_enet_free_desc_rings(struct xgene_enet_pdata *pdata)
        if (ring) {
                if (ring->cp_ring && ring->cp_ring->cp_skb)
                        devm_kfree(dev, ring->cp_ring->cp_skb);
+               if (ring->cp_ring && pdata->cq_cnt)
+                       xgene_enet_free_desc_ring(ring->cp_ring);
                xgene_enet_free_desc_ring(ring);
        }
 
@@ -673,6 +716,12 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
        rx_ring->nbufpool = NUM_BUFPOOL;
        rx_ring->buf_pool = buf_pool;
        rx_ring->irq = pdata->rx_irq;
+       if (!pdata->cq_cnt) {
+               snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx-txc",
+                        ndev->name);
+       } else {
+               snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx", ndev->name);
+       }
        buf_pool->rx_skb = devm_kcalloc(dev, buf_pool->slots,
                                        sizeof(struct sk_buff *), GFP_KERNEL);
        if (!buf_pool->rx_skb) {
@@ -694,7 +743,22 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
        }
        pdata->tx_ring = tx_ring;
 
-       cp_ring = pdata->rx_ring;
+       if (!pdata->cq_cnt) {
+               cp_ring = pdata->rx_ring;
+       } else {
+               /* allocate tx completion descriptor ring */
+               ring_id = xgene_enet_get_ring_id(RING_OWNER_CPU, cpu_bufnum++);
+               cp_ring = xgene_enet_create_desc_ring(ndev, ring_num++,
+                                                     RING_CFGSIZE_16KB,
+                                                     ring_id);
+               if (!cp_ring) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+               cp_ring->irq = pdata->txc_irq;
+               snprintf(cp_ring->irq_name, IRQ_ID_SIZE, "%s-txc", ndev->name);
+       }
+
        cp_ring->cp_skb = devm_kcalloc(dev, tx_ring->slots,
                                       sizeof(struct sk_buff *), GFP_KERNEL);
        if (!cp_ring->cp_skb) {
@@ -853,14 +917,6 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
                return -ENOMEM;
        }
 
-       ret = platform_get_irq(pdev, 0);
-       if (ret <= 0) {
-               dev_err(dev, "Unable to get ENET Rx IRQ\n");
-               ret = ret ? : -ENXIO;
-               return ret;
-       }
-       pdata->rx_irq = ret;
-
        ret = xgene_get_port_id(dev, pdata);
        if (ret)
                return ret;
@@ -882,6 +938,24 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
                return -ENODEV;
        }
 
+       ret = platform_get_irq(pdev, 0);
+       if (ret <= 0) {
+               dev_err(dev, "Unable to get ENET Rx IRQ\n");
+               ret = ret ? : -ENXIO;
+               return ret;
+       }
+       pdata->rx_irq = ret;
+
+       if (pdata->phy_mode != PHY_INTERFACE_MODE_RGMII) {
+               ret = platform_get_irq(pdev, 1);
+               if (ret <= 0) {
+                       dev_err(dev, "Unable to get ENET Tx completion IRQ\n");
+                       ret = ret ? : -ENXIO;
+                       return ret;
+               }
+               pdata->txc_irq = ret;
+       }
+
        pdata->clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(pdata->clk)) {
                /* Firmware may have set up the clock already. */
@@ -950,11 +1024,13 @@ static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata)
                pdata->mac_ops = &xgene_sgmac_ops;
                pdata->port_ops = &xgene_sgport_ops;
                pdata->rm = RM1;
+               pdata->cq_cnt = XGENE_MAX_TXC_RINGS;
                break;
        default:
                pdata->mac_ops = &xgene_xgmac_ops;
                pdata->port_ops = &xgene_xgport_ops;
                pdata->rm = RM0;
+               pdata->cq_cnt = XGENE_MAX_TXC_RINGS;
                break;
        }
 
@@ -977,12 +1053,38 @@ static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata)
 
 }
 
+static void xgene_enet_napi_add(struct xgene_enet_pdata *pdata)
+{
+       struct napi_struct *napi;
+
+       napi = &pdata->rx_ring->napi;
+       netif_napi_add(pdata->ndev, napi, xgene_enet_napi, NAPI_POLL_WEIGHT);
+
+       if (pdata->cq_cnt) {
+               napi = &pdata->tx_ring->cp_ring->napi;
+               netif_napi_add(pdata->ndev, napi, xgene_enet_napi,
+                              NAPI_POLL_WEIGHT);
+       }
+}
+
+static void xgene_enet_napi_del(struct xgene_enet_pdata *pdata)
+{
+       struct napi_struct *napi;
+
+       napi = &pdata->rx_ring->napi;
+       netif_napi_del(napi);
+
+       if (pdata->cq_cnt) {
+               napi = &pdata->tx_ring->cp_ring->napi;
+               netif_napi_del(napi);
+       }
+}
+
 static int xgene_enet_probe(struct platform_device *pdev)
 {
        struct net_device *ndev;
        struct xgene_enet_pdata *pdata;
        struct device *dev = &pdev->dev;
-       struct napi_struct *napi;
        struct xgene_mac_ops *mac_ops;
        int ret;
 
@@ -1024,8 +1126,7 @@ static int xgene_enet_probe(struct platform_device *pdev)
        if (ret)
                goto err;
 
-       napi = &pdata->rx_ring->napi;
-       netif_napi_add(ndev, napi, xgene_enet_napi, NAPI_POLL_WEIGHT);
+       xgene_enet_napi_add(pdata);
        mac_ops = pdata->mac_ops;
        if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
                ret = xgene_enet_mdio_config(pdata);
@@ -1052,7 +1153,7 @@ static int xgene_enet_remove(struct platform_device *pdev)
        mac_ops->rx_disable(pdata);
        mac_ops->tx_disable(pdata);
 
-       netif_napi_del(&pdata->rx_ring->napi);
+       xgene_enet_napi_del(pdata);
        xgene_enet_mdio_remove(pdata);
        xgene_enet_delete_desc_rings(pdata);
        unregister_netdev(ndev);
index b93ed21a157f92ba32092f838be816c037e8988a..8f3d232b09bc8c5d0f321fc398aea52c498f22cc 100644 (file)
@@ -51,6 +51,9 @@
 #define START_BP_BUFNUM_1      0x2A
 #define START_RING_NUM_1       264
 
+#define IRQ_ID_SIZE            16
+#define XGENE_MAX_TXC_RINGS    1
+
 #define PHY_POLL_LINK_ON       (10 * HZ)
 #define PHY_POLL_LINK_OFF      (PHY_POLL_LINK_ON / 5)
 
@@ -63,6 +66,7 @@ struct xgene_enet_desc_ring {
        u16 tail;
        u16 slots;
        u16 irq;
+       char irq_name[IRQ_ID_SIZE];
        u32 size;
        u32 state[NUM_RING_CONFIG];
        void __iomem *cmd_base;
@@ -117,6 +121,8 @@ struct xgene_enet_pdata {
        u32 cp_qcnt_hi;
        u32 cp_qcnt_low;
        u32 rx_irq;
+       u32 txc_irq;
+       u8 cq_cnt;
        void __iomem *eth_csr_addr;
        void __iomem *eth_ring_if_addr;
        void __iomem *eth_diag_csr_addr;
index 9677431c582a663eccffd85dde334f5f713bbf80..039b0c1f480ee002510e623fc6cf2ba52f6b356e 100644 (file)
@@ -12557,6 +12557,7 @@ static netdev_features_t bnx2x_features_check(struct sk_buff *skb,
                                              struct net_device *dev,
                                              netdev_features_t features)
 {
+       features = vlan_features_check(skb, features);
        return vxlan_features_check(skb, features);
 }
 
index c38d5429e27a7c17ae62c80d4d977ac5a4e7b6d7..31e14079e1d7988b4b9429b68c18f09477544b29 100644 (file)
@@ -964,36 +964,58 @@ static void bcmgenet_free_cb(struct enet_cb *cb)
        dma_unmap_addr_set(cb, dma_addr, 0);
 }
 
-static inline void bcmgenet_tx_ring16_int_disable(struct bcmgenet_priv *priv,
-                                                 struct bcmgenet_tx_ring *ring)
+static inline void bcmgenet_rx_ring16_int_disable(struct bcmgenet_rx_ring *ring)
 {
-       bcmgenet_intrl2_0_writel(priv,
+       bcmgenet_intrl2_0_writel(ring->priv,
+                                UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE,
+                                INTRL2_CPU_MASK_SET);
+}
+
+static inline void bcmgenet_rx_ring16_int_enable(struct bcmgenet_rx_ring *ring)
+{
+       bcmgenet_intrl2_0_writel(ring->priv,
+                                UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE,
+                                INTRL2_CPU_MASK_CLEAR);
+}
+
+static inline void bcmgenet_rx_ring_int_disable(struct bcmgenet_rx_ring *ring)
+{
+       bcmgenet_intrl2_1_writel(ring->priv,
+                                1 << (UMAC_IRQ1_RX_INTR_SHIFT + ring->index),
+                                INTRL2_CPU_MASK_SET);
+}
+
+static inline void bcmgenet_rx_ring_int_enable(struct bcmgenet_rx_ring *ring)
+{
+       bcmgenet_intrl2_1_writel(ring->priv,
+                                1 << (UMAC_IRQ1_RX_INTR_SHIFT + ring->index),
+                                INTRL2_CPU_MASK_CLEAR);
+}
+
+static inline void bcmgenet_tx_ring16_int_disable(struct bcmgenet_tx_ring *ring)
+{
+       bcmgenet_intrl2_0_writel(ring->priv,
                                 UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE,
                                 INTRL2_CPU_MASK_SET);
 }
 
-static inline void bcmgenet_tx_ring16_int_enable(struct bcmgenet_priv *priv,
-                                                struct bcmgenet_tx_ring *ring)
+static inline void bcmgenet_tx_ring16_int_enable(struct bcmgenet_tx_ring *ring)
 {
-       bcmgenet_intrl2_0_writel(priv,
+       bcmgenet_intrl2_0_writel(ring->priv,
                                 UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE,
                                 INTRL2_CPU_MASK_CLEAR);
 }
 
-static inline void bcmgenet_tx_ring_int_enable(struct bcmgenet_priv *priv,
-                                              struct bcmgenet_tx_ring *ring)
+static inline void bcmgenet_tx_ring_int_enable(struct bcmgenet_tx_ring *ring)
 {
-       bcmgenet_intrl2_1_writel(priv, (1 << ring->index),
+       bcmgenet_intrl2_1_writel(ring->priv, 1 << ring->index,
                                 INTRL2_CPU_MASK_CLEAR);
-       priv->int1_mask &= ~(1 << ring->index);
 }
 
-static inline void bcmgenet_tx_ring_int_disable(struct bcmgenet_priv *priv,
-                                               struct bcmgenet_tx_ring *ring)
+static inline void bcmgenet_tx_ring_int_disable(struct bcmgenet_tx_ring *ring)
 {
-       bcmgenet_intrl2_1_writel(priv, (1 << ring->index),
+       bcmgenet_intrl2_1_writel(ring->priv, 1 << ring->index,
                                 INTRL2_CPU_MASK_SET);
-       priv->int1_mask |= (1 << ring->index);
 }
 
 /* Unlocked version of the reclaim routine */
@@ -1085,7 +1107,7 @@ static int bcmgenet_tx_poll(struct napi_struct *napi, int budget)
 
        if (work_done == 0) {
                napi_complete(napi);
-               ring->int_enable(ring->priv, ring);
+               ring->int_enable(ring);
 
                return 0;
        }
@@ -1396,11 +1418,10 @@ static struct sk_buff *bcmgenet_rx_refill(struct bcmgenet_priv *priv,
 /* bcmgenet_desc_rx - descriptor based rx process.
  * this could be called from bottom half, or from NAPI polling method.
  */
-static unsigned int bcmgenet_desc_rx(struct bcmgenet_priv *priv,
-                                    unsigned int index,
+static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
                                     unsigned int budget)
 {
-       struct bcmgenet_rx_ring *ring = &priv->rx_rings[index];
+       struct bcmgenet_priv *priv = ring->priv;
        struct net_device *dev = priv->dev;
        struct enet_cb *cb;
        struct sk_buff *skb;
@@ -1412,7 +1433,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_priv *priv,
        unsigned int discards;
        unsigned int chksum_ok = 0;
 
-       p_index = bcmgenet_rdma_ring_readl(priv, index, RDMA_PROD_INDEX);
+       p_index = bcmgenet_rdma_ring_readl(priv, ring->index, RDMA_PROD_INDEX);
 
        discards = (p_index >> DMA_P_INDEX_DISCARD_CNT_SHIFT) &
                   DMA_P_INDEX_DISCARD_CNT_MASK;
@@ -1425,7 +1446,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_priv *priv,
                /* Clear HW register when we reach 75% of maximum 0xFFFF */
                if (ring->old_discards >= 0xC000) {
                        ring->old_discards = 0;
-                       bcmgenet_rdma_ring_writel(priv, index, 0,
+                       bcmgenet_rdma_ring_writel(priv, ring->index, 0,
                                                  RDMA_PROD_INDEX);
                }
        }
@@ -1533,7 +1554,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_priv *priv,
                        dev->stats.multicast++;
 
                /* Notify kernel */
-               napi_gro_receive(&priv->napi, skb);
+               napi_gro_receive(&ring->napi, skb);
                netif_dbg(priv, rx_status, dev, "pushed up to kernel\n");
 
 next:
@@ -1544,12 +1565,29 @@ next:
                        ring->read_ptr = ring->cb_ptr;
 
                ring->c_index = (ring->c_index + 1) & DMA_C_INDEX_MASK;
-               bcmgenet_rdma_ring_writel(priv, index, ring->c_index, RDMA_CONS_INDEX);
+               bcmgenet_rdma_ring_writel(priv, ring->index, ring->c_index, RDMA_CONS_INDEX);
        }
 
        return rxpktprocessed;
 }
 
+/* Rx NAPI polling method */
+static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
+{
+       struct bcmgenet_rx_ring *ring = container_of(napi,
+                       struct bcmgenet_rx_ring, napi);
+       unsigned int work_done;
+
+       work_done = bcmgenet_desc_rx(ring, budget);
+
+       if (work_done < budget) {
+               napi_complete(napi);
+               ring->int_enable(ring);
+       }
+
+       return work_done;
+}
+
 /* Assign skb to RX DMA descriptor. */
 static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv,
                                     struct bcmgenet_rx_ring *ring)
@@ -1658,8 +1696,10 @@ static int init_umac(struct bcmgenet_priv *priv)
 {
        struct device *kdev = &priv->pdev->dev;
        int ret;
-       u32 reg, cpu_mask_clear;
-       int index;
+       u32 reg;
+       u32 int0_enable = 0;
+       u32 int1_enable = 0;
+       int i;
 
        dev_dbg(&priv->pdev->dev, "bcmgenet: init_umac\n");
 
@@ -1686,15 +1726,17 @@ static int init_umac(struct bcmgenet_priv *priv)
 
        bcmgenet_intr_disable(priv);
 
-       cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_TXDMA_BDONE;
+       /* Enable Rx default queue 16 interrupts */
+       int0_enable |= (UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE);
 
-       dev_dbg(kdev, "%s:Enabling RXDMA_BDONE interrupt\n", __func__);
+       /* Enable Tx default queue 16 interrupts */
+       int0_enable |= (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE);
 
        /* Monitor cable plug/unplugged event for internal PHY */
        if (phy_is_internal(priv->phydev)) {
-               cpu_mask_clear |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP);
+               int0_enable |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP);
        } else if (priv->ext_phy) {
-               cpu_mask_clear |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP);
+               int0_enable |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP);
        } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
                reg = bcmgenet_bp_mc_get(priv);
                reg |= BIT(priv->hw_params->bp_in_en_shift);
@@ -1709,13 +1751,18 @@ static int init_umac(struct bcmgenet_priv *priv)
 
        /* Enable MDIO interrupts on GENET v3+ */
        if (priv->hw_params->flags & GENET_HAS_MDIO_INTR)
-               cpu_mask_clear |= UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR;
+               int0_enable |= (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR);
+
+       /* Enable Rx priority queue interrupts */
+       for (i = 0; i < priv->hw_params->rx_queues; ++i)
+               int1_enable |= (1 << (UMAC_IRQ1_RX_INTR_SHIFT + i));
 
-       bcmgenet_intrl2_0_writel(priv, cpu_mask_clear, INTRL2_CPU_MASK_CLEAR);
+       /* Enable Tx priority queue interrupts */
+       for (i = 0; i < priv->hw_params->tx_queues; ++i)
+               int1_enable |= (1 << i);
 
-       for (index = 0; index < priv->hw_params->tx_queues; index++)
-               bcmgenet_intrl2_1_writel(priv, (1 << index),
-                                        INTRL2_CPU_MASK_CLEAR);
+       bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR);
+       bcmgenet_intrl2_1_writel(priv, int1_enable, INTRL2_CPU_MASK_CLEAR);
 
        /* Enable rx/tx engine.*/
        dev_dbg(kdev, "done init umac\n");
@@ -1734,7 +1781,6 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
 
        spin_lock_init(&ring->lock);
        ring->priv = priv;
-       netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64);
        ring->index = index;
        if (index == DESC_INDEX) {
                ring->queue = 0;
@@ -1778,17 +1824,6 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
                                  TDMA_WRITE_PTR);
        bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1,
                                  DMA_END_ADDR);
-
-       napi_enable(&ring->napi);
-}
-
-static void bcmgenet_fini_tx_ring(struct bcmgenet_priv *priv,
-                                 unsigned int index)
-{
-       struct bcmgenet_tx_ring *ring = &priv->tx_rings[index];
-
-       napi_disable(&ring->napi);
-       netif_napi_del(&ring->napi);
 }
 
 /* Initialize a RDMA ring */
@@ -1800,7 +1835,15 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv,
        u32 words_per_bd = WORDS_PER_BD(priv);
        int ret;
 
+       ring->priv = priv;
        ring->index = index;
+       if (index == DESC_INDEX) {
+               ring->int_enable = bcmgenet_rx_ring16_int_enable;
+               ring->int_disable = bcmgenet_rx_ring16_int_disable;
+       } else {
+               ring->int_enable = bcmgenet_rx_ring_int_enable;
+               ring->int_disable = bcmgenet_rx_ring_int_disable;
+       }
        ring->cbs = priv->rx_cbs + start_ptr;
        ring->size = size;
        ring->c_index = 0;
@@ -1836,6 +1879,62 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv,
        return ret;
 }
 
+static void bcmgenet_init_tx_napi(struct bcmgenet_priv *priv)
+{
+       unsigned int i;
+       struct bcmgenet_tx_ring *ring;
+
+       for (i = 0; i < priv->hw_params->tx_queues; ++i) {
+               ring = &priv->tx_rings[i];
+               netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64);
+       }
+
+       ring = &priv->tx_rings[DESC_INDEX];
+       netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64);
+}
+
+static void bcmgenet_enable_tx_napi(struct bcmgenet_priv *priv)
+{
+       unsigned int i;
+       struct bcmgenet_tx_ring *ring;
+
+       for (i = 0; i < priv->hw_params->tx_queues; ++i) {
+               ring = &priv->tx_rings[i];
+               napi_enable(&ring->napi);
+       }
+
+       ring = &priv->tx_rings[DESC_INDEX];
+       napi_enable(&ring->napi);
+}
+
+static void bcmgenet_disable_tx_napi(struct bcmgenet_priv *priv)
+{
+       unsigned int i;
+       struct bcmgenet_tx_ring *ring;
+
+       for (i = 0; i < priv->hw_params->tx_queues; ++i) {
+               ring = &priv->tx_rings[i];
+               napi_disable(&ring->napi);
+       }
+
+       ring = &priv->tx_rings[DESC_INDEX];
+       napi_disable(&ring->napi);
+}
+
+static void bcmgenet_fini_tx_napi(struct bcmgenet_priv *priv)
+{
+       unsigned int i;
+       struct bcmgenet_tx_ring *ring;
+
+       for (i = 0; i < priv->hw_params->tx_queues; ++i) {
+               ring = &priv->tx_rings[i];
+               netif_napi_del(&ring->napi);
+       }
+
+       ring = &priv->tx_rings[DESC_INDEX];
+       netif_napi_del(&ring->napi);
+}
+
 /* Initialize Tx queues
  *
  * Queues 0-3 are priority-based, each one has 32 descriptors,
@@ -1896,6 +1995,9 @@ static void bcmgenet_init_tx_queues(struct net_device *dev)
        bcmgenet_tdma_writel(priv, dma_priority[1], DMA_PRIORITY_1);
        bcmgenet_tdma_writel(priv, dma_priority[2], DMA_PRIORITY_2);
 
+       /* Initialize Tx NAPI */
+       bcmgenet_init_tx_napi(priv);
+
        /* Enable Tx queues */
        bcmgenet_tdma_writel(priv, ring_cfg, DMA_RING_CFG);
 
@@ -1905,6 +2007,62 @@ static void bcmgenet_init_tx_queues(struct net_device *dev)
        bcmgenet_tdma_writel(priv, dma_ctrl, DMA_CTRL);
 }
 
+static void bcmgenet_init_rx_napi(struct bcmgenet_priv *priv)
+{
+       unsigned int i;
+       struct bcmgenet_rx_ring *ring;
+
+       for (i = 0; i < priv->hw_params->rx_queues; ++i) {
+               ring = &priv->rx_rings[i];
+               netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, 64);
+       }
+
+       ring = &priv->rx_rings[DESC_INDEX];
+       netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, 64);
+}
+
+static void bcmgenet_enable_rx_napi(struct bcmgenet_priv *priv)
+{
+       unsigned int i;
+       struct bcmgenet_rx_ring *ring;
+
+       for (i = 0; i < priv->hw_params->rx_queues; ++i) {
+               ring = &priv->rx_rings[i];
+               napi_enable(&ring->napi);
+       }
+
+       ring = &priv->rx_rings[DESC_INDEX];
+       napi_enable(&ring->napi);
+}
+
+static void bcmgenet_disable_rx_napi(struct bcmgenet_priv *priv)
+{
+       unsigned int i;
+       struct bcmgenet_rx_ring *ring;
+
+       for (i = 0; i < priv->hw_params->rx_queues; ++i) {
+               ring = &priv->rx_rings[i];
+               napi_disable(&ring->napi);
+       }
+
+       ring = &priv->rx_rings[DESC_INDEX];
+       napi_disable(&ring->napi);
+}
+
+static void bcmgenet_fini_rx_napi(struct bcmgenet_priv *priv)
+{
+       unsigned int i;
+       struct bcmgenet_rx_ring *ring;
+
+       for (i = 0; i < priv->hw_params->rx_queues; ++i) {
+               ring = &priv->rx_rings[i];
+               netif_napi_del(&ring->napi);
+       }
+
+       ring = &priv->rx_rings[DESC_INDEX];
+       netif_napi_del(&ring->napi);
+}
+
 /* Initialize Rx queues
  *
  * Queues 0-15 are priority queues. Hardware Filtering Block (HFB) can be
@@ -1954,6 +2112,9 @@ static int bcmgenet_init_rx_queues(struct net_device *dev)
        ring_cfg |= (1 << DESC_INDEX);
        dma_ctrl |= (1 << (DESC_INDEX + DMA_RING_BUF_EN_SHIFT));
 
+       /* Initialize Rx NAPI */
+       bcmgenet_init_rx_napi(priv);
+
        /* Enable rings */
        bcmgenet_rdma_writel(priv, ring_cfg, DMA_RING_CFG);
 
@@ -2037,12 +2198,8 @@ static void __bcmgenet_fini_dma(struct bcmgenet_priv *priv)
 
 static void bcmgenet_fini_dma(struct bcmgenet_priv *priv)
 {
-       int i;
-
-       bcmgenet_fini_tx_ring(priv, DESC_INDEX);
-
-       for (i = 0; i < priv->hw_params->tx_queues; i++)
-               bcmgenet_fini_tx_ring(priv, i);
+       bcmgenet_fini_rx_napi(priv);
+       bcmgenet_fini_tx_napi(priv);
 
        __bcmgenet_fini_dma(priv);
 }
@@ -2056,9 +2213,6 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv)
 
        netif_dbg(priv, hw, priv->dev, "%s\n", __func__);
 
-       /* Init rDma */
-       bcmgenet_rdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE);
-
        /* Initialize common Rx ring structures */
        priv->rx_bds = priv->base + priv->hw_params->rdma_offset;
        priv->num_rx_bds = TOTAL_DESC;
@@ -2072,25 +2226,13 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv)
                cb->bd_addr = priv->rx_bds + i * DMA_DESC_SIZE;
        }
 
-       /* Initialize Rx queues */
-       ret = bcmgenet_init_rx_queues(priv->dev);
-       if (ret) {
-               netdev_err(priv->dev, "failed to initialize Rx queues\n");
-               bcmgenet_free_rx_buffers(priv);
-               kfree(priv->rx_cbs);
-               return ret;
-       }
-
-       /* Init tDma */
-       bcmgenet_tdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE);
-
        /* Initialize common TX ring structures */
        priv->tx_bds = priv->base + priv->hw_params->tdma_offset;
        priv->num_tx_bds = TOTAL_DESC;
        priv->tx_cbs = kcalloc(priv->num_tx_bds, sizeof(struct enet_cb),
                               GFP_KERNEL);
        if (!priv->tx_cbs) {
-               __bcmgenet_fini_dma(priv);
+               kfree(priv->rx_cbs);
                return -ENOMEM;
        }
 
@@ -2099,28 +2241,26 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv)
                cb->bd_addr = priv->tx_bds + i * DMA_DESC_SIZE;
        }
 
-       /* Initialize Tx queues */
-       bcmgenet_init_tx_queues(priv->dev);
-
-       return 0;
-}
+       /* Init rDma */
+       bcmgenet_rdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE);
 
-/* NAPI polling method*/
-static int bcmgenet_poll(struct napi_struct *napi, int budget)
-{
-       struct bcmgenet_priv *priv = container_of(napi,
-                       struct bcmgenet_priv, napi);
-       unsigned int work_done;
+       /* Initialize Rx queues */
+       ret = bcmgenet_init_rx_queues(priv->dev);
+       if (ret) {
+               netdev_err(priv->dev, "failed to initialize Rx queues\n");
+               bcmgenet_free_rx_buffers(priv);
+               kfree(priv->rx_cbs);
+               kfree(priv->tx_cbs);
+               return ret;
+       }
 
-       work_done = bcmgenet_desc_rx(priv, DESC_INDEX, budget);
+       /* Init tDma */
+       bcmgenet_tdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE);
 
-       if (work_done < budget) {
-               napi_complete(napi);
-               bcmgenet_intrl2_0_writel(priv, UMAC_IRQ_RXDMA_BDONE,
-                                        INTRL2_CPU_MASK_CLEAR);
-       }
+       /* Initialize Tx queues */
+       bcmgenet_init_tx_queues(priv->dev);
 
-       return work_done;
+       return 0;
 }
 
 /* Interrupt bottom half */
@@ -2147,50 +2287,66 @@ static void bcmgenet_irq_task(struct work_struct *work)
        }
 }
 
-/* bcmgenet_isr1: interrupt handler for ring buffer. */
+/* bcmgenet_isr1: handle Rx and Tx priority queues */
 static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
 {
        struct bcmgenet_priv *priv = dev_id;
-       struct bcmgenet_tx_ring *ring;
+       struct bcmgenet_rx_ring *rx_ring;
+       struct bcmgenet_tx_ring *tx_ring;
        unsigned int index;
 
        /* Save irq status for bottom-half processing. */
        priv->irq1_stat =
                bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
                ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS);
+
        /* clear interrupts */
        bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR);
 
        netif_dbg(priv, intr, priv->dev,
                  "%s: IRQ=0x%x\n", __func__, priv->irq1_stat);
 
-       /* Check the MBDONE interrupts.
-        * packet is done, reclaim descriptors
-        */
+       /* Check Rx priority queue interrupts */
+       for (index = 0; index < priv->hw_params->rx_queues; index++) {
+               if (!(priv->irq1_stat & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
+                       continue;
+
+               rx_ring = &priv->rx_rings[index];
+
+               if (likely(napi_schedule_prep(&rx_ring->napi))) {
+                       rx_ring->int_disable(rx_ring);
+                       __napi_schedule(&rx_ring->napi);
+               }
+       }
+
+       /* Check Tx priority queue interrupts */
        for (index = 0; index < priv->hw_params->tx_queues; index++) {
                if (!(priv->irq1_stat & BIT(index)))
                        continue;
 
-               ring = &priv->tx_rings[index];
+               tx_ring = &priv->tx_rings[index];
 
-               if (likely(napi_schedule_prep(&ring->napi))) {
-                       ring->int_disable(priv, ring);
-                       __napi_schedule(&ring->napi);
+               if (likely(napi_schedule_prep(&tx_ring->napi))) {
+                       tx_ring->int_disable(tx_ring);
+                       __napi_schedule(&tx_ring->napi);
                }
        }
 
        return IRQ_HANDLED;
 }
 
-/* bcmgenet_isr0: Handle various interrupts. */
+/* bcmgenet_isr0: handle Rx and Tx default queues + other stuff */
 static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
 {
        struct bcmgenet_priv *priv = dev_id;
+       struct bcmgenet_rx_ring *rx_ring;
+       struct bcmgenet_tx_ring *tx_ring;
 
        /* Save irq status for bottom-half processing. */
        priv->irq0_stat =
                bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
                ~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS);
+
        /* clear interrupts */
        bcmgenet_intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR);
 
@@ -2198,25 +2354,23 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
                  "IRQ=0x%x\n", priv->irq0_stat);
 
        if (priv->irq0_stat & (UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE)) {
-               /* We use NAPI(software interrupt throttling, if
-                * Rx Descriptor throttling is not used.
-                * Disable interrupt, will be enabled in the poll method.
-                */
-               if (likely(napi_schedule_prep(&priv->napi))) {
-                       bcmgenet_intrl2_0_writel(priv, UMAC_IRQ_RXDMA_BDONE,
-                                                INTRL2_CPU_MASK_SET);
-                       __napi_schedule(&priv->napi);
+               rx_ring = &priv->rx_rings[DESC_INDEX];
+
+               if (likely(napi_schedule_prep(&rx_ring->napi))) {
+                       rx_ring->int_disable(rx_ring);
+                       __napi_schedule(&rx_ring->napi);
                }
        }
-       if (priv->irq0_stat &
-                       (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE)) {
-               struct bcmgenet_tx_ring *ring = &priv->tx_rings[DESC_INDEX];
 
-               if (likely(napi_schedule_prep(&ring->napi))) {
-                       ring->int_disable(priv, ring);
-                       __napi_schedule(&ring->napi);
+       if (priv->irq0_stat & (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE)) {
+               tx_ring = &priv->tx_rings[DESC_INDEX];
+
+               if (likely(napi_schedule_prep(&tx_ring->napi))) {
+                       tx_ring->int_disable(tx_ring);
+                       __napi_schedule(&tx_ring->napi);
                }
        }
+
        if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R |
                                UMAC_IRQ_PHY_DET_F |
                                UMAC_IRQ_LINK_UP |
@@ -2463,7 +2617,8 @@ static void bcmgenet_netif_start(struct net_device *dev)
        struct bcmgenet_priv *priv = netdev_priv(dev);
 
        /* Start the network engine */
-       napi_enable(&priv->napi);
+       bcmgenet_enable_rx_napi(priv);
+       bcmgenet_enable_tx_napi(priv);
 
        umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true);
 
@@ -2568,10 +2723,10 @@ static void bcmgenet_netif_stop(struct net_device *dev)
        struct bcmgenet_priv *priv = netdev_priv(dev);
 
        netif_tx_stop_all_queues(dev);
-       napi_disable(&priv->napi);
        phy_stop(priv->phydev);
-
        bcmgenet_intr_disable(priv);
+       bcmgenet_disable_rx_napi(priv);
+       bcmgenet_disable_tx_napi(priv);
 
        /* Wait for pending work items to complete. Since interrupts are
         * disabled no new work will be scheduled.
@@ -2972,7 +3127,6 @@ static int bcmgenet_probe(struct platform_device *pdev)
        dev->watchdog_timeo = 2 * HZ;
        dev->ethtool_ops = &bcmgenet_ethtool_ops;
        dev->netdev_ops = &bcmgenet_netdev_ops;
-       netif_napi_add(dev, &priv->napi, bcmgenet_poll, 64);
 
        priv->msg_enable = netif_msg_init(-1, GENET_MSG_DEFAULT);
 
index 7a59879d441f3c989483fdddab874d27bdff3b50..a834da1dfe4c36d0257a25dafceba071fcb4900f 100644 (file)
@@ -310,6 +310,11 @@ struct bcmgenet_mib_counters {
 #define UMAC_IRQ_MDIO_DONE             (1 << 23)
 #define UMAC_IRQ_MDIO_ERROR            (1 << 24)
 
+/* INTRL2 instance 1 definitions */
+#define UMAC_IRQ1_TX_INTR_MASK         0xFFFF
+#define UMAC_IRQ1_RX_INTR_MASK         0xFFFF
+#define UMAC_IRQ1_RX_INTR_SHIFT                16
+
 /* Register block offsets */
 #define GENET_SYS_OFF                  0x0000
 #define GENET_GR_BRIDGE_OFF            0x0040
@@ -535,14 +540,13 @@ struct bcmgenet_tx_ring {
        unsigned int    prod_index;     /* Tx ring producer index SW copy */
        unsigned int    cb_ptr;         /* Tx ring initial CB ptr */
        unsigned int    end_ptr;        /* Tx ring end CB ptr */
-       void (*int_enable)(struct bcmgenet_priv *priv,
-                          struct bcmgenet_tx_ring *);
-       void (*int_disable)(struct bcmgenet_priv *priv,
-                           struct bcmgenet_tx_ring *);
+       void (*int_enable)(struct bcmgenet_tx_ring *);
+       void (*int_disable)(struct bcmgenet_tx_ring *);
        struct bcmgenet_priv *priv;
 };
 
 struct bcmgenet_rx_ring {
+       struct napi_struct napi;        /* Rx NAPI struct */
        unsigned int    index;          /* Rx ring index */
        struct enet_cb  *cbs;           /* Rx ring buffer control block */
        unsigned int    size;           /* Rx ring size */
@@ -551,6 +555,9 @@ struct bcmgenet_rx_ring {
        unsigned int    cb_ptr;         /* Rx ring initial CB ptr */
        unsigned int    end_ptr;        /* Rx ring end CB ptr */
        unsigned int    old_discards;
+       void (*int_enable)(struct bcmgenet_rx_ring *);
+       void (*int_disable)(struct bcmgenet_rx_ring *);
+       struct bcmgenet_priv *priv;
 };
 
 /* device context */
@@ -558,11 +565,6 @@ struct bcmgenet_priv {
        void __iomem *base;
        enum bcmgenet_version version;
        struct net_device *dev;
-       u32 int0_mask;
-       u32 int1_mask;
-
-       /* NAPI for descriptor based rx */
-       struct napi_struct napi ____cacheline_aligned;
 
        /* transmit variables */
        void __iomem *tx_bds;
index 062d3c0b5818e950cfde59dca995407ea329f4e4..6c8a62eefe5148ff8f5fc2743cfc8ff2988475eb 100644 (file)
@@ -46,17 +46,17 @@ bool cxgb_fcoe_sof_eof_supported(struct adapter *adap, struct sk_buff *skb)
 
        if ((sof != FC_SOF_I3) && (sof != FC_SOF_N3)) {
                dev_err(adap->pdev_dev, "Unsupported SOF 0x%x\n", sof);
-               return 0;
+               return false;
        }
 
        skb_copy_bits(skb, skb->len - 4, &eof, 1);
 
        if ((eof != FC_EOF_N) && (eof != FC_EOF_T)) {
                dev_err(adap->pdev_dev, "Unsupported EOF 0x%x\n", eof);
-               return 0;
+               return false;
        }
 
-       return 1;
+       return true;
 }
 
 /**
index eb39673ed6a6b4c3a52f7ba487fa8538b4763a37..4b0494b9cc7cf034e8ebdc190d08e46a8a1e790e 100644 (file)
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/u64_stats_sync.h>
+#include <linux/cpumask.h>
 
 #include "be_hw.h"
 #include "be_roce.h"
 
-#define DRV_VER                        "10.4u"
+#define DRV_VER                        "10.6.0.1"
 #define DRV_NAME               "be2net"
 #define BE_NAME                        "Emulex BladeEngine2"
 #define BE3_NAME               "Emulex BladeEngine3"
@@ -183,6 +184,7 @@ struct be_eq_obj {
        u16 spurious_intr;
        struct napi_struct napi;
        struct be_adapter *adapter;
+       cpumask_var_t  affinity_mask;
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
 #define BE_EQ_IDLE             0
index d8df78b6554d03176a7b4f3fd00eb30c6bd76e6a..5ff7fba9b67c9d39043d1094193db714f7625a6b 100644 (file)
@@ -2342,6 +2342,7 @@ static void be_evt_queues_destroy(struct be_adapter *adapter)
                        napi_hash_del(&eqo->napi);
                        netif_napi_del(&eqo->napi);
                }
+               free_cpumask_var(eqo->affinity_mask);
                be_queue_free(adapter, &eqo->q);
        }
 }
@@ -2357,6 +2358,11 @@ static int be_evt_queues_create(struct be_adapter *adapter)
                                    adapter->cfg_num_qs);
 
        for_all_evt_queues(adapter, eqo, i) {
+               if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
+                       return -ENOMEM;
+               cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev),
+                                           eqo->affinity_mask);
+
                netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
                               BE_NAPI_WEIGHT);
                napi_hash_add(&eqo->napi);
@@ -2448,8 +2454,9 @@ static void be_tx_queues_destroy(struct be_adapter *adapter)
 
 static int be_tx_qs_create(struct be_adapter *adapter)
 {
-       struct be_queue_info *cq, *eq;
+       struct be_queue_info *cq;
        struct be_tx_obj *txo;
+       struct be_eq_obj *eqo;
        int status, i;
 
        adapter->num_tx_qs = min(adapter->num_evt_qs, be_max_txqs(adapter));
@@ -2467,8 +2474,8 @@ static int be_tx_qs_create(struct be_adapter *adapter)
                /* If num_evt_qs is less than num_tx_qs, then more than
                 * one txq share an eq
                 */
-               eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
-               status = be_cmd_cq_create(adapter, cq, eq, false, 3);
+               eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
+               status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
                if (status)
                        return status;
 
@@ -2480,6 +2487,9 @@ static int be_tx_qs_create(struct be_adapter *adapter)
                status = be_cmd_txq_create(adapter, txo);
                if (status)
                        return status;
+
+               netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
+                                   eqo->idx);
        }
 
        dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
@@ -3028,6 +3038,8 @@ static int be_msix_register(struct be_adapter *adapter)
                status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
                if (status)
                        goto err_msix;
+
+               irq_set_affinity_hint(vec, eqo->affinity_mask);
        }
 
        return 0;
@@ -3072,7 +3084,7 @@ static void be_irq_unregister(struct be_adapter *adapter)
 {
        struct net_device *netdev = adapter->netdev;
        struct be_eq_obj *eqo;
-       int i;
+       int i, vec;
 
        if (!adapter->isr_registered)
                return;
@@ -3084,8 +3096,11 @@ static void be_irq_unregister(struct be_adapter *adapter)
        }
 
        /* MSIx */
-       for_all_evt_queues(adapter, eqo, i)
-               free_irq(be_msix_vec_get(adapter, eqo), eqo);
+       for_all_evt_queues(adapter, eqo, i) {
+               vec = be_msix_vec_get(adapter, eqo);
+               irq_set_affinity_hint(vec, NULL);
+               free_irq(vec, eqo);
+       }
 
 done:
        adapter->isr_registered = false;
index 0e07545ccc97c9727486afe4b7f9f9520fdd7ff0..8457d0306e3a76107c18ed524a3000d47b3ead6e 100644 (file)
@@ -2093,6 +2093,7 @@ static const struct net_device_ops igb_netdev_ops = {
 #endif
        .ndo_fix_features       = igb_fix_features,
        .ndo_set_features       = igb_set_features,
+       .ndo_features_check     = passthru_features_check,
 };
 
 /**
index a8339e98ad24ade71ff69f49ff88135221dca079..ebc93a101c9376c56c3ae41c2c03d3787b43c0bb 100644 (file)
@@ -2373,6 +2373,7 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
                                                struct net_device *dev,
                                                netdev_features_t features)
 {
+       features = vlan_features_check(skb, features);
        return vxlan_features_check(skb, features);
 }
 #endif
index a430a34a4434aa78a87cb43d90c805fa83ac8252..367f3976df5690d71ba845214423296913aa07dd 100644 (file)
@@ -507,6 +507,7 @@ static netdev_features_t qlcnic_features_check(struct sk_buff *skb,
                                               struct net_device *dev,
                                               netdev_features_t features)
 {
+       features = vlan_features_check(skb, features);
        return vxlan_features_check(skb, features);
 }
 #endif
index 8678e39aba08cfe0d5b3b0578348b258812ec4ec..5a1a3e7c93e7952adcd3a75d6db1c3afb75d9f42 100644 (file)
@@ -2204,27 +2204,18 @@ static int try_toggle_control_gpio(struct device *dev,
                                   int value, unsigned int nsdelay)
 {
        struct gpio_desc *gpio = *desc;
+       enum gpiod_flags flags = value ? GPIOD_OUT_LOW : GPIOD_OUT_HIGH;
        int res;
 
-       gpio = devm_gpiod_get_index(dev, name, index);
-       if (IS_ERR(gpio)) {
-               if (PTR_ERR(gpio) == -ENOENT) {
-                       *desc = NULL;
-                       return 0;
-               }
-
+       gpio = devm_gpiod_get_index_optional(dev, name, index, flags);
+       if (IS_ERR(gpio))
                return PTR_ERR(gpio);
+
+       if (gpio) {
+               if (nsdelay)
+                       usleep_range(nsdelay, 2 * nsdelay);
+               gpiod_set_value_cansleep(gpio, value);
        }
-       res = gpiod_direction_output(gpio, !value);
-       if (res) {
-               dev_err(dev, "unable to toggle gpio %s: %i\n", name, res);
-               devm_gpiod_put(dev, gpio);
-               gpio = NULL;
-               return res;
-       }
-       if (nsdelay)
-               usleep_range(nsdelay, 2 * nsdelay);
-       gpiod_set_value_cansleep(gpio, value);
        *desc = gpio;
 
        return 0;
index 4815843a6019a7bf8181a3298119d2ce8b362676..384f057d65701a4688aa32a727655243cc9c84d0 100644 (file)
@@ -131,6 +131,7 @@ struct hv_netvsc_packet {
 
        struct hv_device *device;
        bool is_data_pkt;
+       bool xmit_more; /* from skb */
        u16 vlan_tci;
 
        u16 q_idx;
@@ -596,7 +597,16 @@ struct nvsp_message {
 
 #define VRSS_SEND_TAB_SIZE 16
 
-/* Per netvsc channel-specific */
+#define RNDIS_MAX_PKT_DEFAULT 8
+#define RNDIS_PKT_ALIGN_DEFAULT 8
+
+struct multi_send_data {
+       spinlock_t lock; /* protect struct multi_send_data */
+       struct hv_netvsc_packet *pkt; /* netvsc pkt pending */
+       u32 count; /* counter of batched packets */
+};
+
+/* Per netvsc device */
 struct netvsc_device {
        struct hv_device *dev;
 
@@ -647,6 +657,10 @@ struct netvsc_device {
        unsigned char *cb_buffer;
        /* The sub channel callback buffer */
        unsigned char *sub_cb_buf;
+
+       struct multi_send_data msd[NR_CPUS];
+       u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
+       u32 pkt_align; /* alignment bytes, e.g. 8 */
 };
 
 /* NdisInitialize message */
index 208eb05446baa4a6980620773865e3746a13848f..b81bd37d3afbb1b8a6e3048cf0c7bd71f87a7afa 100644 (file)
@@ -37,6 +37,7 @@ static struct netvsc_device *alloc_net_device(struct hv_device *device)
 {
        struct netvsc_device *net_device;
        struct net_device *ndev = hv_get_drvdata(device);
+       int i;
 
        net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
        if (!net_device)
@@ -53,6 +54,11 @@ static struct netvsc_device *alloc_net_device(struct hv_device *device)
        net_device->destroy = false;
        net_device->dev = device;
        net_device->ndev = ndev;
+       net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
+       net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
+
+       for (i = 0; i < num_online_cpus(); i++)
+               spin_lock_init(&net_device->msd[i].lock);
 
        hv_set_drvdata(device, net_device);
        return net_device;
@@ -687,12 +693,23 @@ static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
 
 static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
                                   unsigned int section_index,
+                                  u32 pend_size,
                                   struct hv_netvsc_packet *packet)
 {
        char *start = net_device->send_buf;
-       char *dest = (start + (section_index * net_device->send_section_size));
+       char *dest = start + (section_index * net_device->send_section_size)
+                    + pend_size;
        int i;
        u32 msg_size = 0;
+       u32 padding = 0;
+       u32 remain = packet->total_data_buflen % net_device->pkt_align;
+
+       /* Add padding */
+       if (packet->is_data_pkt && packet->xmit_more && remain) {
+               padding = net_device->pkt_align - remain;
+               packet->rndis_msg->msg_len += padding;
+               packet->total_data_buflen += padding;
+       }
 
        for (i = 0; i < packet->page_buf_cnt; i++) {
                char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
@@ -703,67 +720,48 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
                msg_size += len;
                dest += len;
        }
+
+       if (padding) {
+               memset(dest, 0, padding);
+               msg_size += padding;
+       }
+
        return msg_size;
 }
 
-int netvsc_send(struct hv_device *device,
-                       struct hv_netvsc_packet *packet)
+static inline int netvsc_send_pkt(
+       struct hv_netvsc_packet *packet,
+       struct netvsc_device *net_device)
 {
-       struct netvsc_device *net_device;
-       int ret = 0;
-       struct nvsp_message sendMessage;
-       struct net_device *ndev;
-       struct vmbus_channel *out_channel = NULL;
-       u64 req_id;
-       unsigned int section_index = NETVSC_INVALID_INDEX;
-       u32 msg_size = 0;
-       struct sk_buff *skb = NULL;
+       struct nvsp_message nvmsg;
+       struct vmbus_channel *out_channel = packet->channel;
        u16 q_idx = packet->q_idx;
+       struct net_device *ndev = net_device->ndev;
+       u64 req_id;
+       int ret;
 
-
-       net_device = get_outbound_net_device(device);
-       if (!net_device)
-               return -ENODEV;
-       ndev = net_device->ndev;
-
-       sendMessage.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
+       nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
        if (packet->is_data_pkt) {
                /* 0 is RMC_DATA; */
-               sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 0;
+               nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 0;
        } else {
                /* 1 is RMC_CONTROL; */
-               sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
+               nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 1;
        }
 
-       /* Attempt to send via sendbuf */
-       if (packet->total_data_buflen < net_device->send_section_size) {
-               section_index = netvsc_get_next_send_section(net_device);
-               if (section_index != NETVSC_INVALID_INDEX) {
-                       msg_size = netvsc_copy_to_send_buf(net_device,
-                                                          section_index,
-                                                          packet);
-                       skb = (struct sk_buff *)
-                             (unsigned long)packet->send_completion_tid;
-                       packet->page_buf_cnt = 0;
-               }
-       }
-       packet->send_buf_index = section_index;
-
-
-       sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
-               section_index;
-       sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size;
+       nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
+               packet->send_buf_index;
+       if (packet->send_buf_index == NETVSC_INVALID_INDEX)
+               nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
+       else
+               nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size =
+                       packet->total_data_buflen;
 
        if (packet->send_completion)
                req_id = (ulong)packet;
        else
                req_id = 0;
 
-       out_channel = net_device->chn_table[packet->q_idx];
-       if (out_channel == NULL)
-               out_channel = device->channel;
-       packet->channel = out_channel;
-
        if (out_channel->rescind)
                return -ENODEV;
 
@@ -771,11 +769,12 @@ int netvsc_send(struct hv_device *device,
                ret = vmbus_sendpacket_pagebuffer(out_channel,
                                                  packet->page_buf,
                                                  packet->page_buf_cnt,
-                                                 &sendMessage,
+                                                 &nvmsg,
                                                  sizeof(struct nvsp_message),
                                                  req_id);
        } else {
-               ret = vmbus_sendpacket(out_channel, &sendMessage,
+               ret = vmbus_sendpacket(
+                               out_channel, &nvmsg,
                                sizeof(struct nvsp_message),
                                req_id,
                                VM_PKT_DATA_INBAND,
@@ -809,6 +808,102 @@ int netvsc_send(struct hv_device *device,
                           packet, ret);
        }
 
+       return ret;
+}
+
+int netvsc_send(struct hv_device *device,
+               struct hv_netvsc_packet *packet)
+{
+       struct netvsc_device *net_device;
+       int ret = 0, m_ret = 0;
+       struct vmbus_channel *out_channel;
+       u16 q_idx = packet->q_idx;
+       u32 pktlen = packet->total_data_buflen, msd_len = 0;
+       unsigned int section_index = NETVSC_INVALID_INDEX;
+       struct sk_buff *skb = NULL;
+       unsigned long flag;
+       struct multi_send_data *msdp;
+       struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
+
+       net_device = get_outbound_net_device(device);
+       if (!net_device)
+               return -ENODEV;
+
+       out_channel = net_device->chn_table[q_idx];
+       if (!out_channel) {
+               out_channel = device->channel;
+               q_idx = 0;
+               packet->q_idx = 0;
+       }
+       packet->channel = out_channel;
+       packet->send_buf_index = NETVSC_INVALID_INDEX;
+
+       msdp = &net_device->msd[q_idx];
+
+       /* batch packets in send buffer if possible */
+       spin_lock_irqsave(&msdp->lock, flag);
+       if (msdp->pkt)
+               msd_len = msdp->pkt->total_data_buflen;
+
+       if (packet->is_data_pkt && msd_len > 0 &&
+           msdp->count < net_device->max_pkt &&
+           msd_len + pktlen + net_device->pkt_align <
+           net_device->send_section_size) {
+               section_index = msdp->pkt->send_buf_index;
+
+       } else if (packet->is_data_pkt && pktlen + net_device->pkt_align <
+                  net_device->send_section_size) {
+               section_index = netvsc_get_next_send_section(net_device);
+               if (section_index != NETVSC_INVALID_INDEX) {
+                               msd_send = msdp->pkt;
+                               msdp->pkt = NULL;
+                               msdp->count = 0;
+                               msd_len = 0;
+               }
+       }
+
+       if (section_index != NETVSC_INVALID_INDEX) {
+               netvsc_copy_to_send_buf(net_device,
+                                       section_index, msd_len,
+                                       packet);
+               skb = (struct sk_buff *)
+                      (unsigned long)packet->send_completion_tid;
+
+               packet->page_buf_cnt = 0;
+               packet->send_buf_index = section_index;
+               packet->total_data_buflen += msd_len;
+
+               kfree(msdp->pkt);
+               if (packet->xmit_more) {
+                       msdp->pkt = packet;
+                       msdp->count++;
+               } else {
+                       cur_send = packet;
+                       msdp->pkt = NULL;
+                       msdp->count = 0;
+               }
+       } else {
+               msd_send = msdp->pkt;
+               msdp->pkt = NULL;
+               msdp->count = 0;
+               cur_send = packet;
+       }
+
+       spin_unlock_irqrestore(&msdp->lock, flag);
+
+       if (msd_send) {
+               m_ret = netvsc_send_pkt(msd_send, net_device);
+
+               if (m_ret != 0) {
+                       netvsc_free_send_slot(net_device,
+                                             msd_send->send_buf_index);
+                       kfree(msd_send);
+               }
+       }
+
+       if (cur_send)
+               ret = netvsc_send_pkt(cur_send, net_device);
+
        if (ret != 0) {
                if (section_index != NETVSC_INVALID_INDEX)
                        netvsc_free_send_slot(net_device, section_index);
index a06bd6614007f7460f546951c70fd8cacbe7e6f9..0c998186039e43e4fece13ea4d4fe53cd8dbeb19 100644 (file)
@@ -413,6 +413,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
                return NETDEV_TX_OK;
        }
 
+       packet->xmit_more = skb->xmit_more;
+
        packet->vlan_tci = skb->vlan_tci;
 
        packet->q_idx = skb_get_queue_mapping(skb);
index ca81de04bc76294c24e33d32dea4c8b1e3c8a613..fdfab1feccfc7dedcd274c1b626b1754a1aee719 100644 (file)
@@ -237,6 +237,7 @@ static int rndis_filter_send_request(struct rndis_device *dev,
        }
 
        packet->send_completion = NULL;
+       packet->xmit_more = false;
 
        ret = netvsc_send(dev->net_dev->dev, packet);
        return ret;
@@ -855,6 +856,7 @@ static int rndis_filter_init_device(struct rndis_device *dev)
        u32 status;
        int ret;
        unsigned long t;
+       struct netvsc_device *nvdev = dev->net_dev;
 
        request = get_rndis_request(dev, RNDIS_MSG_INIT,
                        RNDIS_MESSAGE_SIZE(struct rndis_initialize_request));
@@ -889,6 +891,8 @@ static int rndis_filter_init_device(struct rndis_device *dev)
        status = init_complete->status;
        if (status == RNDIS_STATUS_SUCCESS) {
                dev->state = RNDIS_DEV_INITIALIZED;
+               nvdev->max_pkt = init_complete->max_pkt_per_msg;
+               nvdev->pkt_align = 1 << init_complete->pkt_alignment_factor;
                ret = 0;
        } else {
                dev->state = RNDIS_DEV_UNINITIALIZED;
@@ -1137,8 +1141,6 @@ int rndis_filter_device_add(struct hv_device *dev,
        net_device->num_chn = 1 +
                init_packet->msg.v5_msg.subchn_comp.num_subchannels;
 
-       vmbus_are_subchannels_present(dev->channel);
-
        ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn);
 
 out:
index a23319fc78caa04f0c076c0749a45fd8393d3e73..6928448f6b7f1a80f5cfd46eeee5992f3bc24cf7 100644 (file)
@@ -1979,6 +1979,7 @@ static const struct net_device_ops team_netdev_ops = {
        .ndo_change_carrier     = team_change_carrier,
        .ndo_bridge_setlink     = ndo_dflt_netdev_switch_port_bridge_setlink,
        .ndo_bridge_dellink     = ndo_dflt_netdev_switch_port_bridge_dellink,
+       .ndo_features_check     = passthru_features_check,
 };
 
 /***********************
index 4e84236b62ce7ec8668c0a7792af318e088e29e6..a829930dac150e2910629f4d918b3dca3b493e8a 100644 (file)
@@ -749,9 +749,9 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
 {
        struct receive_queue *rq =
                container_of(napi, struct receive_queue, napi);
-       unsigned int r, received = 0;
+       unsigned int r, received;
 
-       received += virtnet_receive(rq, budget - received);
+       received = virtnet_receive(rq, budget);
 
        /* Out of packets? */
        if (received < budget) {
index 280a315de8d6c8bc6eecf4ef08d0c7bac27f3caa..d5cda067115aaaa0f2b00a41c5a3d8c68ad114b3 100644 (file)
@@ -59,6 +59,7 @@ enum bpf_arg_type {
        ARG_PTR_TO_STACK,       /* any pointer to eBPF program stack */
        ARG_CONST_STACK_SIZE,   /* number of bytes accessed from stack */
 
+       ARG_PTR_TO_CTX,         /* pointer to context */
        ARG_ANYTHING,           /* any (initialized) argument is ok */
 };
 
index b11b28a30b9ee78e1f600cc076736a72dc624c72..920e4457ce6eab1541a9322595fe2894559ce16f 100644 (file)
@@ -561,4 +561,71 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb,
                skb->protocol = htons(ETH_P_802_2);
 }
 
+/**
+ * skb_vlan_tagged - check if skb is vlan tagged.
+ * @skb: skbuff to query
+ *
+ * Returns true if the skb is tagged, regardless of whether it is hardware
+ * accelerated or not.
+ */
+static inline bool skb_vlan_tagged(const struct sk_buff *skb)
+{
+       if (!skb_vlan_tag_present(skb) &&
+           likely(skb->protocol != htons(ETH_P_8021Q) &&
+                  skb->protocol != htons(ETH_P_8021AD)))
+               return false;
+
+       return true;
+}
+
+/**
+ * skb_vlan_tagged_multi - check if skb is vlan tagged with multiple headers.
+ * @skb: skbuff to query
+ *
+ * Returns true if the skb is tagged with multiple vlan headers, regardless
+ * of whether it is hardware accelerated or not.
+ */
+static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb)
+{
+       __be16 protocol = skb->protocol;
+
+       if (!skb_vlan_tag_present(skb)) {
+               struct vlan_ethhdr *veh;
+
+               if (likely(protocol != htons(ETH_P_8021Q) &&
+                          protocol != htons(ETH_P_8021AD)))
+                       return false;
+
+               veh = (struct vlan_ethhdr *)skb->data;
+               protocol = veh->h_vlan_encapsulated_proto;
+       }
+
+       if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD))
+               return false;
+
+       return true;
+}
+
+/**
+ * vlan_features_check - drop unsafe features for skb with multiple tags.
+ * @skb: skbuff to query
+ * @features: features to be checked
+ *
+ * Returns features without unsafe ones if the skb has multiple tags.
+ */
+static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
+                                                   netdev_features_t features)
+{
+       if (skb_vlan_tagged_multi(skb))
+               features = netdev_intersect_features(features,
+                                                    NETIF_F_SG |
+                                                    NETIF_F_HIGHDMA |
+                                                    NETIF_F_FRAGLIST |
+                                                    NETIF_F_GEN_CSUM |
+                                                    NETIF_F_HW_VLAN_CTAG_TX |
+                                                    NETIF_F_HW_VLAN_STAG_TX);
+
+       return features;
+}
+
 #endif /* !(_LINUX_IF_VLAN_H_) */
index 08c4ab37189fcb25dcc4e076ae8a81792fc8aeeb..967bb4c8caf16aa88d0a2645a3b754fd1f08b1ab 100644 (file)
@@ -3657,6 +3657,9 @@ void netdev_change_features(struct net_device *dev);
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
                                        struct net_device *dev);
 
+netdev_features_t passthru_features_check(struct sk_buff *skb,
+                                         struct net_device *dev,
+                                         netdev_features_t features);
 netdev_features_t netif_skb_features(struct sk_buff *skb);
 
 static inline bool net_gso_ok(netdev_features_t features, int gso_type)
index 99f2e49a8a07889b7e4ef0321c779dfaba7c9b13..e23d242d1230ff899f37478bcb3a8b92769129d2 100644 (file)
@@ -88,7 +88,7 @@ struct rhashtable_compare_arg {
 };
 
 typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
-typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed);
+typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
 typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
                               const void *obj);
 
@@ -242,7 +242,9 @@ static inline unsigned int rht_head_hashfn(
        const char *ptr = rht_obj(ht, he);
 
        return likely(params.obj_hashfn) ?
-              rht_bucket_index(tbl, params.obj_hashfn(ptr, tbl->hash_rnd)) :
+              rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?:
+                                                           ht->p.key_len,
+                                                      tbl->hash_rnd)) :
               rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
 }
 
index 47917e5e1e12ab8d9a69d71b21287ebbc3a87172..fbca63ba8f733fd37fa300bbbe33a300ab05fd49 100644 (file)
@@ -296,6 +296,12 @@ struct dsa_switch_driver {
                                     u32 br_port_mask);
        int     (*port_stp_update)(struct dsa_switch *ds, int port,
                                   u8 state);
+       int     (*fdb_add)(struct dsa_switch *ds, int port,
+                          const unsigned char *addr, u16 vid);
+       int     (*fdb_del)(struct dsa_switch *ds, int port,
+                          const unsigned char *addr, u16 vid);
+       int     (*fdb_getnext)(struct dsa_switch *ds, int port,
+                              unsigned char *addr, bool *is_static);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
index d756af559977d0cf1275e3916e302f0455e48b22..b8cd60dcb4e1f7fd1ae8b7efe7e5802a1ddd86ca 100644 (file)
@@ -138,19 +138,12 @@ struct nft_userdata {
 /**
  *     struct nft_set_elem - generic representation of set elements
  *
- *     @cookie: implementation specific element cookie
  *     @key: element key
- *     @data: element data (maps only)
- *     @flags: element flags (end of interval)
- *
- *     The cookie can be used to store a handle to the element for subsequent
- *     removal.
+ *     @priv: element private data and extensions
  */
 struct nft_set_elem {
-       void                    *cookie;
        struct nft_data         key;
-       struct nft_data         data;
-       u32                     flags;
+       void                    *priv;
 };
 
 struct nft_set;
@@ -202,11 +195,15 @@ struct nft_set_estimate {
        enum nft_set_class      class;
 };
 
+struct nft_set_ext;
+
 /**
  *     struct nft_set_ops - nf_tables set operations
  *
  *     @lookup: look up an element within the set
  *     @insert: insert new element into set
+ *     @activate: activate new element in the next generation
+ *     @deactivate: deactivate element in the next generation
  *     @remove: remove element from set
  *     @walk: iterate over all set elemeennts
  *     @privsize: function to return size of set private data
@@ -214,16 +211,19 @@ struct nft_set_estimate {
  *     @destroy: destroy private data of set instance
  *     @list: nf_tables_set_ops list node
  *     @owner: module reference
+ *     @elemsize: element private size
  *     @features: features supported by the implementation
  */
 struct nft_set_ops {
        bool                            (*lookup)(const struct nft_set *set,
                                                  const struct nft_data *key,
-                                                 struct nft_data *data);
-       int                             (*get)(const struct nft_set *set,
-                                              struct nft_set_elem *elem);
+                                                 const struct nft_set_ext **ext);
        int                             (*insert)(const struct nft_set *set,
                                                  const struct nft_set_elem *elem);
+       void                            (*activate)(const struct nft_set *set,
+                                                   const struct nft_set_elem *elem);
+       void *                          (*deactivate)(const struct nft_set *set,
+                                                     const struct nft_set_elem *elem);
        void                            (*remove)(const struct nft_set *set,
                                                  const struct nft_set_elem *elem);
        void                            (*walk)(const struct nft_ctx *ctx,
@@ -241,6 +241,7 @@ struct nft_set_ops {
 
        struct list_head                list;
        struct module                   *owner;
+       unsigned int                    elemsize;
        u32                             features;
 };
 
@@ -259,6 +260,7 @@ void nft_unregister_set(struct nft_set_ops *ops);
  *     @nelems: number of elements
  *     @policy: set parameterization (see enum nft_set_policies)
  *     @ops: set ops
+ *     @pnet: network namespace
  *     @flags: set flags
  *     @klen: key length
  *     @dlen: data length
@@ -275,6 +277,7 @@ struct nft_set {
        u16                             policy;
        /* runtime data below here */
        const struct nft_set_ops        *ops ____cacheline_aligned;
+       possible_net_t                  pnet;
        u16                             flags;
        u8                              klen;
        u8                              dlen;
@@ -311,6 +314,121 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
                          struct nft_set_binding *binding);
 
+/**
+ *     enum nft_set_extensions - set extension type IDs
+ *
+ *     @NFT_SET_EXT_KEY: element key
+ *     @NFT_SET_EXT_DATA: mapping data
+ *     @NFT_SET_EXT_FLAGS: element flags
+ *     @NFT_SET_EXT_NUM: number of extension types
+ */
+enum nft_set_extensions {
+       NFT_SET_EXT_KEY,
+       NFT_SET_EXT_DATA,
+       NFT_SET_EXT_FLAGS,
+       NFT_SET_EXT_NUM
+};
+
+/**
+ *     struct nft_set_ext_type - set extension type
+ *
+ *     @len: fixed part length of the extension
+ *     @align: alignment requirements of the extension
+ */
+struct nft_set_ext_type {
+       u8      len;
+       u8      align;
+};
+
+extern const struct nft_set_ext_type nft_set_ext_types[];
+
+/**
+ *     struct nft_set_ext_tmpl - set extension template
+ *
+ *     @len: length of extension area
+ *     @offset: offsets of individual extension types
+ */
+struct nft_set_ext_tmpl {
+       u16     len;
+       u8      offset[NFT_SET_EXT_NUM];
+};
+
+/**
+ *     struct nft_set_ext - set extensions
+ *
+ *     @genmask: generation mask
+ *     @offset: offsets of individual extension types
+ *     @data: beginning of extension data
+ */
+struct nft_set_ext {
+       u8      genmask;
+       u8      offset[NFT_SET_EXT_NUM];
+       char    data[0];
+};
+
+static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl)
+{
+       memset(tmpl, 0, sizeof(*tmpl));
+       tmpl->len = sizeof(struct nft_set_ext);
+}
+
+static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id,
+                                         unsigned int len)
+{
+       tmpl->len        = ALIGN(tmpl->len, nft_set_ext_types[id].align);
+       BUG_ON(tmpl->len > U8_MAX);
+       tmpl->offset[id] = tmpl->len;
+       tmpl->len       += nft_set_ext_types[id].len + len;
+}
+
+static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id)
+{
+       nft_set_ext_add_length(tmpl, id, 0);
+}
+
+static inline void nft_set_ext_init(struct nft_set_ext *ext,
+                                   const struct nft_set_ext_tmpl *tmpl)
+{
+       memcpy(ext->offset, tmpl->offset, sizeof(ext->offset));
+}
+
+static inline bool __nft_set_ext_exists(const struct nft_set_ext *ext, u8 id)
+{
+       return !!ext->offset[id];
+}
+
+static inline bool nft_set_ext_exists(const struct nft_set_ext *ext, u8 id)
+{
+       return ext && __nft_set_ext_exists(ext, id);
+}
+
+static inline void *nft_set_ext(const struct nft_set_ext *ext, u8 id)
+{
+       return (void *)ext + ext->offset[id];
+}
+
+static inline struct nft_data *nft_set_ext_key(const struct nft_set_ext *ext)
+{
+       return nft_set_ext(ext, NFT_SET_EXT_KEY);
+}
+
+static inline struct nft_data *nft_set_ext_data(const struct nft_set_ext *ext)
+{
+       return nft_set_ext(ext, NFT_SET_EXT_DATA);
+}
+
+static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext)
+{
+       return nft_set_ext(ext, NFT_SET_EXT_FLAGS);
+}
+
+static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
+                                                  void *elem)
+{
+       return elem + set->ops->elemsize;
+}
+
+void nft_set_elem_destroy(const struct nft_set *set, void *elem);
 
 /**
  *     struct nft_expr_type - nf_tables expression type
@@ -449,7 +567,6 @@ enum nft_chain_flags {
  *
  *     @rules: list of rules in the chain
  *     @list: used internally
- *     @net: net namespace that this chain belongs to
  *     @table: table that this chain belongs to
  *     @handle: chain handle
  *     @use: number of jump references to this chain
@@ -460,7 +577,6 @@ enum nft_chain_flags {
 struct nft_chain {
        struct list_head                rules;
        struct list_head                list;
-       struct net                      *net;
        struct nft_table                *table;
        u64                             handle;
        u32                             use;
@@ -512,6 +628,7 @@ struct nft_stats {
  *     struct nft_base_chain - nf_tables base chain
  *
  *     @ops: netfilter hook ops
+ *     @pnet: net namespace that this chain belongs to
  *     @type: chain type
  *     @policy: default policy
  *     @stats: per-cpu chain stats
@@ -519,6 +636,7 @@ struct nft_stats {
  */
 struct nft_base_chain {
        struct nf_hook_ops              ops[NFT_HOOK_OPS_MAX];
+       possible_net_t                  pnet;
        const struct nf_chain_type      *type;
        u8                              policy;
        struct nft_stats __percpu       *stats;
@@ -605,6 +723,50 @@ void nft_unregister_expr(struct nft_expr_type *);
 #define MODULE_ALIAS_NFT_SET() \
        MODULE_ALIAS("nft-set")
 
+/*
+ * The gencursor defines two generations, the currently active and the
+ * next one. Objects contain a bitmask of 2 bits specifying the generations
+ * they're active in. A set bit means they're inactive in the generation
+ * represented by that bit.
+ *
+ * New objects start out as inactive in the current and active in the
+ * next generation. When committing the ruleset the bitmask is cleared,
+ * meaning they're active in all generations. When removing an object,
+ * it is set inactive in the next generation. After committing the ruleset,
+ * the objects are removed.
+ */
+static inline unsigned int nft_gencursor_next(const struct net *net)
+{
+       return net->nft.gencursor + 1 == 1 ? 1 : 0;
+}
+
+static inline u8 nft_genmask_next(const struct net *net)
+{
+       return 1 << nft_gencursor_next(net);
+}
+
+static inline u8 nft_genmask_cur(const struct net *net)
+{
+       /* Use ACCESS_ONCE() to prevent refetching the value for atomicity */
+       return 1 << ACCESS_ONCE(net->nft.gencursor);
+}
+
+/*
+ * Set element transaction helpers
+ */
+
+static inline bool nft_set_elem_active(const struct nft_set_ext *ext,
+                                      u8 genmask)
+{
+       return !(ext->genmask & genmask);
+}
+
+static inline void nft_set_elem_change_active(const struct nft_set *set,
+                                             struct nft_set_ext *ext)
+{
+       ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet));
+}
+
 /**
  *     struct nft_trans - nf_tables object update in transaction
  *
index 42690daa924e4db6415856eadc80f3e7117a415d..963303fb96ae227263e648fb0c8dbafdc9cbc945 100644 (file)
@@ -529,8 +529,6 @@ int tcp_write_wakeup(struct sock *);
 void tcp_send_fin(struct sock *sk);
 void tcp_send_active_reset(struct sock *sk, gfp_t priority);
 int tcp_send_synack(struct sock *);
-bool tcp_syn_flood_action(struct sock *sk, const struct sk_buff *skb,
-                         const char *proto);
 void tcp_push_one(struct sock *, unsigned int mss_now);
 void tcp_send_ack(struct sock *sk);
 void tcp_send_delayed_ack(struct sock *sk);
index 27dc4ec588402e2cf1ee5e285719fc8eb4ea876e..74aab6e0d96436b5d24e19c18e9c8d81dc14eee9 100644 (file)
@@ -168,6 +168,7 @@ enum bpf_func_id {
        BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
        BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
        BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
+       BPF_FUNC_skb_store_bytes, /* int skb_store_bytes(skb, offset, from, len) */
        __BPF_FUNC_MAX_ID,
 };
 
index 0e714f799ec075ddaa43c3ff643abaed30ad2ce9..630a7bac1e513e330785283e38aa1dfbe9eb5c50 100644 (file)
@@ -773,6 +773,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
                expected_type = CONST_IMM;
        } else if (arg_type == ARG_CONST_MAP_PTR) {
                expected_type = CONST_PTR_TO_MAP;
+       } else if (arg_type == ARG_PTR_TO_CTX) {
+               expected_type = PTR_TO_CTX;
        } else {
                verbose("unsupported arg_type %d\n", arg_type);
                return -EFAULT;
index 4b7b7e672b934842e9252726d9d1fef6b75f0214..4898442b837fbd715f8d5079a1060e43c4b7a367 100644 (file)
@@ -691,7 +691,7 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
  *     struct rhash_head       node;
  * };
  *
- * u32 my_hash_fn(const void *data, u32 seed)
+ * u32 my_hash_fn(const void *data, u32 len, u32 seed)
  * {
  *     struct test_obj *obj = data;
  *
index f196552ec3c41e13f2891f3a29a43844d56c2574..8b5ab9033b418606ecef53cbb9dcc91669e9d752 100644 (file)
@@ -554,6 +554,7 @@ static int vlan_dev_init(struct net_device *dev)
        if (dev->features & NETIF_F_VLAN_FEATURES)
                netdev_warn(real_dev, "VLAN features are set incorrectly.  Q-in-Q configurations may not work correctly.\n");
 
+       dev->vlan_features = real_dev->vlan_features & ~NETIF_F_ALL_FCOE;
 
        /* ipv6 shared card related stuff */
        dev->dev_id = real_dev->dev_id;
index a0408d497dae04e7caa145f05c915b058aa2d356..3a06003ecafd307acf8b1fae3182e81b5a2ac328 100644 (file)
@@ -2562,12 +2562,26 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
        return features;
 }
 
+netdev_features_t passthru_features_check(struct sk_buff *skb,
+                                         struct net_device *dev,
+                                         netdev_features_t features)
+{
+       return features;
+}
+EXPORT_SYMBOL(passthru_features_check);
+
+static netdev_features_t dflt_features_check(const struct sk_buff *skb,
+                                            struct net_device *dev,
+                                            netdev_features_t features)
+{
+       return vlan_features_check(skb, features);
+}
+
 netdev_features_t netif_skb_features(struct sk_buff *skb)
 {
        struct net_device *dev = skb->dev;
        netdev_features_t features = dev->features;
        u16 gso_segs = skb_shinfo(skb)->gso_segs;
-       __be16 protocol = skb->protocol;
 
        if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
                features &= ~NETIF_F_GSO_MASK;
@@ -2579,34 +2593,17 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
        if (skb->encapsulation)
                features &= dev->hw_enc_features;
 
-       if (!skb_vlan_tag_present(skb)) {
-               if (unlikely(protocol == htons(ETH_P_8021Q) ||
-                            protocol == htons(ETH_P_8021AD))) {
-                       struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
-                       protocol = veh->h_vlan_encapsulated_proto;
-               } else {
-                       goto finalize;
-               }
-       }
-
-       features = netdev_intersect_features(features,
-                                            dev->vlan_features |
-                                            NETIF_F_HW_VLAN_CTAG_TX |
-                                            NETIF_F_HW_VLAN_STAG_TX);
-
-       if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
+       if (skb_vlan_tagged(skb))
                features = netdev_intersect_features(features,
-                                                    NETIF_F_SG |
-                                                    NETIF_F_HIGHDMA |
-                                                    NETIF_F_FRAGLIST |
-                                                    NETIF_F_GEN_CSUM |
+                                                    dev->vlan_features |
                                                     NETIF_F_HW_VLAN_CTAG_TX |
                                                     NETIF_F_HW_VLAN_STAG_TX);
 
-finalize:
        if (dev->netdev_ops->ndo_features_check)
                features &= dev->netdev_ops->ndo_features_check(skb, dev,
                                                                features);
+       else
+               features &= dflt_features_check(skb, dev, features);
 
        return harmonize_features(skb, features);
 }
index 32f43c59908c49daacbbb5e2060e057fd3eed422..444a07e4f68d7eca3e1c324ae91991d32779ae4d 100644 (file)
@@ -1175,6 +1175,56 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
        return 0;
 }
 
+static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+       struct sk_buff *skb = (struct sk_buff *) (long) r1;
+       unsigned int offset = (unsigned int) r2;
+       void *from = (void *) (long) r3;
+       unsigned int len = (unsigned int) r4;
+       char buf[16];
+       void *ptr;
+
+       /* bpf verifier guarantees that:
+        * 'from' pointer points to bpf program stack
+        * 'len' bytes of it were initialized
+        * 'len' > 0
+        * 'skb' is a valid pointer to 'struct sk_buff'
+        *
+        * so check for invalid 'offset' and too large 'len'
+        */
+       if (offset > 0xffff || len > sizeof(buf))
+               return -EFAULT;
+
+       if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len))
+               return -EFAULT;
+
+       ptr = skb_header_pointer(skb, offset, len, buf);
+       if (unlikely(!ptr))
+               return -EFAULT;
+
+       skb_postpull_rcsum(skb, ptr, len);
+
+       memcpy(ptr, from, len);
+
+       if (ptr == buf)
+               /* skb_store_bits cannot return -EFAULT here */
+               skb_store_bits(skb, offset, ptr, len);
+
+       if (skb->ip_summed == CHECKSUM_COMPLETE)
+               skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0));
+       return 0;
+}
+
+const struct bpf_func_proto bpf_skb_store_bytes_proto = {
+       .func           = bpf_skb_store_bytes,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_STACK,
+       .arg4_type      = ARG_CONST_STACK_SIZE,
+};
+
 static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
 {
@@ -1194,6 +1244,17 @@ sk_filter_func_proto(enum bpf_func_id func_id)
        }
 }
 
+static const struct bpf_func_proto *
+tc_cls_act_func_proto(enum bpf_func_id func_id)
+{
+       switch (func_id) {
+       case BPF_FUNC_skb_store_bytes:
+               return &bpf_skb_store_bytes_proto;
+       default:
+               return sk_filter_func_proto(func_id);
+       }
+}
+
 static bool sk_filter_is_valid_access(int off, int size,
                                      enum bpf_access_type type)
 {
@@ -1270,18 +1331,24 @@ static const struct bpf_verifier_ops sk_filter_ops = {
        .convert_ctx_access = sk_filter_convert_ctx_access,
 };
 
+static const struct bpf_verifier_ops tc_cls_act_ops = {
+       .get_func_proto = tc_cls_act_func_proto,
+       .is_valid_access = sk_filter_is_valid_access,
+       .convert_ctx_access = sk_filter_convert_ctx_access,
+};
+
 static struct bpf_prog_type_list sk_filter_type __read_mostly = {
        .ops = &sk_filter_ops,
        .type = BPF_PROG_TYPE_SOCKET_FILTER,
 };
 
 static struct bpf_prog_type_list sched_cls_type __read_mostly = {
-       .ops = &sk_filter_ops,
+       .ops = &tc_cls_act_ops,
        .type = BPF_PROG_TYPE_SCHED_CLS,
 };
 
 static struct bpf_prog_type_list sched_act_type __read_mostly = {
-       .ops = &sk_filter_ops,
+       .ops = &tc_cls_act_ops,
        .type = BPF_PROG_TYPE_SCHED_ACT,
 };
 
index 39555f3f263bd98a7f2f5919738309d67aebb7a4..3597724ec3d82399fd40b515d27ebada8c6a4cfa 100644 (file)
@@ -201,6 +201,105 @@ out:
        return 0;
 }
 
+static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+                            struct net_device *dev,
+                            const unsigned char *addr, u16 vid, u16 nlm_flags)
+{
+       struct dsa_slave_priv *p = netdev_priv(dev);
+       struct dsa_switch *ds = p->parent;
+       int ret = -EOPNOTSUPP;
+
+       if (ds->drv->fdb_add)
+               ret = ds->drv->fdb_add(ds, p->port, addr, vid);
+
+       return ret;
+}
+
+static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+                            struct net_device *dev,
+                            const unsigned char *addr, u16 vid)
+{
+       struct dsa_slave_priv *p = netdev_priv(dev);
+       struct dsa_switch *ds = p->parent;
+       int ret = -EOPNOTSUPP;
+
+       if (ds->drv->fdb_del)
+               ret = ds->drv->fdb_del(ds, p->port, addr, vid);
+
+       return ret;
+}
+
+static int dsa_slave_fill_info(struct net_device *dev, struct sk_buff *skb,
+                              const unsigned char *addr, u16 vid,
+                              bool is_static,
+                              u32 portid, u32 seq, int type,
+                              unsigned int flags)
+{
+       struct nlmsghdr *nlh;
+       struct ndmsg *ndm;
+
+       nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
+       if (!nlh)
+               return -EMSGSIZE;
+
+       ndm = nlmsg_data(nlh);
+       ndm->ndm_family  = AF_BRIDGE;
+       ndm->ndm_pad1    = 0;
+       ndm->ndm_pad2    = 0;
+       ndm->ndm_flags   = NTF_EXT_LEARNED;
+       ndm->ndm_type    = 0;
+       ndm->ndm_ifindex = dev->ifindex;
+       ndm->ndm_state   = is_static ? NUD_NOARP : NUD_REACHABLE;
+
+       if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr))
+               goto nla_put_failure;
+
+       if (vid && nla_put_u16(skb, NDA_VLAN, vid))
+               goto nla_put_failure;
+
+       nlmsg_end(skb, nlh);
+       return 0;
+
+nla_put_failure:
+       nlmsg_cancel(skb, nlh);
+       return -EMSGSIZE;
+}
+
+/* Dump information about entries, in response to GETNEIGH */
+static int dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+                             struct net_device *dev,
+                             struct net_device *filter_dev, int idx)
+{
+       struct dsa_slave_priv *p = netdev_priv(dev);
+       struct dsa_switch *ds = p->parent;
+       unsigned char addr[ETH_ALEN] = { 0 };
+       int ret;
+
+       if (!ds->drv->fdb_getnext)
+               return -EOPNOTSUPP;
+
+       for (; ; idx++) {
+               bool is_static;
+
+               ret = ds->drv->fdb_getnext(ds, p->port, addr, &is_static);
+               if (ret < 0)
+                       break;
+
+               if (idx < cb->args[0])
+                       continue;
+
+               ret = dsa_slave_fill_info(dev, skb, addr, 0,
+                                         is_static,
+                                         NETLINK_CB(cb->skb).portid,
+                                         cb->nlh->nlmsg_seq,
+                                         RTM_NEWNEIGH, NLM_F_MULTI);
+               if (ret < 0)
+                       break;
+       }
+
+       return idx;
+}
+
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
        struct dsa_slave_priv *p = netdev_priv(dev);
@@ -572,6 +671,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
        .ndo_change_rx_flags    = dsa_slave_change_rx_flags,
        .ndo_set_rx_mode        = dsa_slave_set_rx_mode,
        .ndo_set_mac_address    = dsa_slave_set_mac_address,
+       .ndo_fdb_add            = dsa_slave_fdb_add,
+       .ndo_fdb_del            = dsa_slave_fdb_del,
+       .ndo_fdb_dump           = dsa_slave_fdb_dump,
        .ndo_do_ioctl           = dsa_slave_ioctl,
 };
 
index d059182c1466fabbc689d0c49715bc441b9c0f72..e7ad950cf9ef9a7f7b4e92ac146e1f92f6c20c19 100644 (file)
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/skbuff.h>
@@ -27,7 +29,7 @@ static struct nf_loginfo default_loginfo = {
        .type   = NF_LOG_TYPE_LOG,
        .u = {
                .log = {
-                       .level    = 5,
+                       .level    = LOGLEVEL_NOTICE,
                        .logflags = NF_LOG_MASK,
                },
        },
index 75101980eeee197a4f8413bbd7d29f4fd9e4bb74..076aadda04737eb7fa829adc14eb98ea37867655 100644 (file)
@@ -5,8 +5,10 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/skbuff.h>
@@ -26,7 +28,7 @@ static struct nf_loginfo default_loginfo = {
        .type   = NF_LOG_TYPE_LOG,
        .u = {
                .log = {
-                       .level    = 5,
+                       .level    = LOGLEVEL_NOTICE,
                        .logflags = NF_LOG_MASK,
                },
        },
index 023196f7ec37a616346bbbef54adea72b7f3269e..18b80e8bc5336564560b7897a939bbbb2d83e5ed 100644 (file)
@@ -5987,6 +5987,35 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
 }
 EXPORT_SYMBOL(inet_reqsk_alloc);
 
+/*
+ * Return true if a syncookie should be sent
+ */
+static bool tcp_syn_flood_action(struct sock *sk,
+                                const struct sk_buff *skb,
+                                const char *proto)
+{
+       const char *msg = "Dropping request";
+       bool want_cookie = false;
+       struct listen_sock *lopt;
+
+#ifdef CONFIG_SYN_COOKIES
+       if (sysctl_tcp_syncookies) {
+               msg = "Sending cookies";
+               want_cookie = true;
+               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
+       } else
+#endif
+               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
+
+       lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
+       if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
+               lopt->synflood_warned = 1;
+               pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
+                       proto, ntohs(tcp_hdr(skb)->dest), msg);
+       }
+       return want_cookie;
+}
+
 int tcp_conn_request(struct request_sock_ops *rsk_ops,
                     const struct tcp_request_sock_ops *af_ops,
                     struct sock *sk, struct sk_buff *skb)
index e073517b2cc727afdbde9161ce4305810a7e9bb4..5aababa20a212068d7ef5acf74c85ddb3d99f61f 100644 (file)
@@ -856,35 +856,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
        kfree(inet_rsk(req)->opt);
 }
 
-/*
- * Return true if a syncookie should be sent
- */
-bool tcp_syn_flood_action(struct sock *sk,
-                        const struct sk_buff *skb,
-                        const char *proto)
-{
-       const char *msg = "Dropping request";
-       bool want_cookie = false;
-       struct listen_sock *lopt;
-
-#ifdef CONFIG_SYN_COOKIES
-       if (sysctl_tcp_syncookies) {
-               msg = "Sending cookies";
-               want_cookie = true;
-               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
-       } else
-#endif
-               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
-
-       lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
-       if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
-               lopt->synflood_warned = 1;
-               pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
-                       proto, ntohs(tcp_hdr(skb)->dest), msg);
-       }
-       return want_cookie;
-}
-EXPORT_SYMBOL(tcp_syn_flood_action);
 
 #ifdef CONFIG_TCP_MD5SIG
 /*
index 27ca79682efbf681a0ab6073f50f8fa73214028e..273eb26cd6d4326d6fb71430ab9f1a4bf29dfc83 100644 (file)
@@ -299,19 +299,16 @@ static int __net_init fib6_rules_net_init(struct net *net)
        ops = fib_rules_register(&fib6_rules_ops_template, net);
        if (IS_ERR(ops))
                return PTR_ERR(ops);
-       net->ipv6.fib6_rules_ops = ops;
-
 
-       err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0,
-                                  RT6_TABLE_LOCAL, 0);
+       err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL, 0);
        if (err)
                goto out_fib6_rules_ops;
 
-       err = fib_default_rule_add(net->ipv6.fib6_rules_ops,
-                                  0x7FFE, RT6_TABLE_MAIN, 0);
+       err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN, 0);
        if (err)
                goto out_fib6_rules_ops;
 
+       net->ipv6.fib6_rules_ops = ops;
 out:
        return err;
 
index bb00c6f2a8855fb72dcc6a1bc5b496e8216d683f..83f59dc3cccc141a7a64261f27795630379c09a1 100644 (file)
@@ -9,7 +9,10 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
 #include <linux/capability.h>
 #include <linux/in.h>
 #include <linux/skbuff.h>
@@ -234,7 +237,7 @@ static struct nf_loginfo trace_loginfo = {
        .type = NF_LOG_TYPE_LOG,
        .u = {
                .log = {
-                       .level = 4,
+                       .level = LOGLEVEL_WARNING,
                        .logflags = NF_LOG_MASK,
                },
        },
index ddf07e6f59d7de70bcf41ba84880b2c699296db7..8dd869642f45a032fcd0c1303319313392244a13 100644 (file)
@@ -5,8 +5,10 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/skbuff.h>
@@ -27,7 +29,7 @@ static struct nf_loginfo default_loginfo = {
        .type   = NF_LOG_TYPE_LOG,
        .u = {
                .log = {
-                       .level    = 5,
+                       .level    = LOGLEVEL_NOTICE,
                        .logflags = NF_LOG_MASK,
                },
        },
index 971cd7526f4bb9e159c4650a71010e5052b1f4e6..f70e34a68f702ab39c43e27d4b8e8127b49525f6 100644 (file)
@@ -522,7 +522,6 @@ config NFT_NAT
          typical Network Address Translation (NAT) packet transformations.
 
 config NFT_QUEUE
-       depends on NETFILTER_XTABLES
        depends on NETFILTER_NETLINK_QUEUE
        tristate "Netfilter nf_tables queue module"
        help
index 363a39a6c286481c24d30ac28369c004ea0eea01..5604c2df05d1a40f2e812d18d2a72fd895359100 100644 (file)
@@ -198,36 +198,31 @@ static int nft_delchain(struct nft_ctx *ctx)
 static inline bool
 nft_rule_is_active(struct net *net, const struct nft_rule *rule)
 {
-       return (rule->genmask & (1 << net->nft.gencursor)) == 0;
-}
-
-static inline int gencursor_next(struct net *net)
-{
-       return net->nft.gencursor+1 == 1 ? 1 : 0;
+       return (rule->genmask & nft_genmask_cur(net)) == 0;
 }
 
 static inline int
 nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
 {
-       return (rule->genmask & (1 << gencursor_next(net))) == 0;
+       return (rule->genmask & nft_genmask_next(net)) == 0;
 }
 
 static inline void
 nft_rule_activate_next(struct net *net, struct nft_rule *rule)
 {
        /* Now inactive, will be active in the future */
-       rule->genmask = (1 << net->nft.gencursor);
+       rule->genmask = nft_genmask_cur(net);
 }
 
 static inline void
 nft_rule_deactivate_next(struct net *net, struct nft_rule *rule)
 {
-       rule->genmask = (1 << gencursor_next(net));
+       rule->genmask = nft_genmask_next(net);
 }
 
 static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
 {
-       rule->genmask &= ~(1 << gencursor_next(net));
+       rule->genmask &= ~nft_genmask_next(net);
 }
 
 static int
@@ -1354,6 +1349,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
                        rcu_assign_pointer(basechain->stats, stats);
                }
 
+               write_pnet(&basechain->pnet, net);
                basechain->type = type;
                chain = &basechain->chain;
 
@@ -1381,7 +1377,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 
        INIT_LIST_HEAD(&chain->rules);
        chain->handle = nf_tables_alloc_handle(table);
-       chain->net = net;
        chain->table = table;
        nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
 
@@ -2695,6 +2690,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
                goto err2;
 
        INIT_LIST_HEAD(&set->bindings);
+       write_pnet(&set->pnet, net);
        set->ops   = ops;
        set->ktype = ktype;
        set->klen  = desc.klen;
@@ -2771,10 +2767,11 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
                                        const struct nft_set_iter *iter,
                                        const struct nft_set_elem *elem)
 {
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
        enum nft_registers dreg;
 
        dreg = nft_type_to_reg(set->dtype);
-       return nft_validate_data_load(ctx, dreg, &elem->data,
+       return nft_validate_data_load(ctx, dreg, nft_set_ext_data(ext),
                                      set->dtype == NFT_DATA_VERDICT ?
                                      NFT_DATA_VERDICT : NFT_DATA_VALUE);
 }
@@ -2827,6 +2824,22 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
                nf_tables_set_destroy(ctx, set);
 }
 
+const struct nft_set_ext_type nft_set_ext_types[] = {
+       [NFT_SET_EXT_KEY]               = {
+               .len    = sizeof(struct nft_data),
+               .align  = __alignof__(struct nft_data),
+       },
+       [NFT_SET_EXT_DATA]              = {
+               .len    = sizeof(struct nft_data),
+               .align  = __alignof__(struct nft_data),
+       },
+       [NFT_SET_EXT_FLAGS]             = {
+               .len    = sizeof(u8),
+               .align  = __alignof__(u8),
+       },
+};
+EXPORT_SYMBOL_GPL(nft_set_ext_types);
+
 /*
  * Set elements
  */
@@ -2873,6 +2886,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
                                  const struct nft_set *set,
                                  const struct nft_set_elem *elem)
 {
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
        unsigned char *b = skb_tail_pointer(skb);
        struct nlattr *nest;
 
@@ -2880,20 +2894,20 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
        if (nest == NULL)
                goto nla_put_failure;
 
-       if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE,
-                         set->klen) < 0)
+       if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext),
+                         NFT_DATA_VALUE, set->klen) < 0)
                goto nla_put_failure;
 
-       if (set->flags & NFT_SET_MAP &&
-           !(elem->flags & NFT_SET_ELEM_INTERVAL_END) &&
-           nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data,
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+           nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext),
                          set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
                          set->dlen) < 0)
                goto nla_put_failure;
 
-       if (elem->flags != 0)
-               if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags)))
-                       goto nla_put_failure;
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+           nla_put_be32(skb, NFTA_SET_ELEM_FLAGS,
+                        htonl(*nft_set_ext_flags(ext))))
+               goto nla_put_failure;
 
        nla_nest_end(skb, nest);
        return 0;
@@ -3114,15 +3128,54 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
        return trans;
 }
 
+static void *nft_set_elem_init(const struct nft_set *set,
+                              const struct nft_set_ext_tmpl *tmpl,
+                              const struct nft_data *key,
+                              const struct nft_data *data,
+                              gfp_t gfp)
+{
+       struct nft_set_ext *ext;
+       void *elem;
+
+       elem = kzalloc(set->ops->elemsize + tmpl->len, gfp);
+       if (elem == NULL)
+               return NULL;
+
+       ext = nft_set_elem_ext(set, elem);
+       nft_set_ext_init(ext, tmpl);
+
+       memcpy(nft_set_ext_key(ext), key, set->klen);
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+               memcpy(nft_set_ext_data(ext), data, set->dlen);
+
+       return elem;
+}
+
+void nft_set_elem_destroy(const struct nft_set *set, void *elem)
+{
+       struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+
+       nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+               nft_data_uninit(nft_set_ext_data(ext), set->dtype);
+
+       kfree(elem);
+}
+EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
+
 static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                            const struct nlattr *attr)
 {
        struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
        struct nft_data_desc d1, d2;
+       struct nft_set_ext_tmpl tmpl;
+       struct nft_set_ext *ext;
        struct nft_set_elem elem;
        struct nft_set_binding *binding;
+       struct nft_data data;
        enum nft_registers dreg;
        struct nft_trans *trans;
+       u32 flags;
        int err;
 
        if (set->size && set->nelems == set->size)
@@ -3136,22 +3189,26 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
        if (nla[NFTA_SET_ELEM_KEY] == NULL)
                return -EINVAL;
 
-       elem.flags = 0;
+       nft_set_ext_prepare(&tmpl);
+
+       flags = 0;
        if (nla[NFTA_SET_ELEM_FLAGS] != NULL) {
-               elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
-               if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END)
+               flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
+               if (flags & ~NFT_SET_ELEM_INTERVAL_END)
                        return -EINVAL;
                if (!(set->flags & NFT_SET_INTERVAL) &&
-                   elem.flags & NFT_SET_ELEM_INTERVAL_END)
+                   flags & NFT_SET_ELEM_INTERVAL_END)
                        return -EINVAL;
+               if (flags != 0)
+                       nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
        }
 
        if (set->flags & NFT_SET_MAP) {
                if (nla[NFTA_SET_ELEM_DATA] == NULL &&
-                   !(elem.flags & NFT_SET_ELEM_INTERVAL_END))
+                   !(flags & NFT_SET_ELEM_INTERVAL_END))
                        return -EINVAL;
                if (nla[NFTA_SET_ELEM_DATA] != NULL &&
-                   elem.flags & NFT_SET_ELEM_INTERVAL_END)
+                   flags & NFT_SET_ELEM_INTERVAL_END)
                        return -EINVAL;
        } else {
                if (nla[NFTA_SET_ELEM_DATA] != NULL)
@@ -3165,12 +3222,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
        if (d1.type != NFT_DATA_VALUE || d1.len != set->klen)
                goto err2;
 
-       err = -EEXIST;
-       if (set->ops->get(set, &elem) == 0)
-               goto err2;
+       nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY);
 
        if (nla[NFTA_SET_ELEM_DATA] != NULL) {
-               err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]);
+               err = nft_data_init(ctx, &data, &d2, nla[NFTA_SET_ELEM_DATA]);
                if (err < 0)
                        goto err2;
 
@@ -3187,29 +3242,43 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                        };
 
                        err = nft_validate_data_load(&bind_ctx, dreg,
-                                                    &elem.data, d2.type);
+                                                    &data, d2.type);
                        if (err < 0)
                                goto err3;
                }
+
+               nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA);
        }
 
+       err = -ENOMEM;
+       elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, GFP_KERNEL);
+       if (elem.priv == NULL)
+               goto err3;
+
+       ext = nft_set_elem_ext(set, elem.priv);
+       if (flags)
+               *nft_set_ext_flags(ext) = flags;
+
        trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
        if (trans == NULL)
-               goto err3;
+               goto err4;
 
+       ext->genmask = nft_genmask_cur(ctx->net);
        err = set->ops->insert(set, &elem);
        if (err < 0)
-               goto err4;
+               goto err5;
 
        nft_trans_elem(trans) = elem;
        list_add_tail(&trans->list, &ctx->net->nft.commit_list);
        return 0;
 
-err4:
+err5:
        kfree(trans);
+err4:
+       kfree(elem.priv);
 err3:
        if (nla[NFTA_SET_ELEM_DATA] != NULL)
-               nft_data_uninit(&elem.data, d2.type);
+               nft_data_uninit(&data, d2.type);
 err2:
        nft_data_uninit(&elem.key, d1.type);
 err1:
@@ -3282,19 +3351,24 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
        if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
                goto err2;
 
-       err = set->ops->get(set, &elem);
-       if (err < 0)
-               goto err2;
-
        trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
        if (trans == NULL) {
                err = -ENOMEM;
                goto err2;
        }
 
+       elem.priv = set->ops->deactivate(set, &elem);
+       if (elem.priv == NULL) {
+               err = -ENOENT;
+               goto err3;
+       }
+
        nft_trans_elem(trans) = elem;
        list_add_tail(&trans->list, &ctx->net->nft.commit_list);
        return 0;
+
+err3:
+       kfree(trans);
 err2:
        nft_data_uninit(&elem.key, desc.type);
 err1:
@@ -3532,6 +3606,10 @@ static void nf_tables_commit_release(struct nft_trans *trans)
        case NFT_MSG_DELSET:
                nft_set_destroy(nft_trans_set(trans));
                break;
+       case NFT_MSG_DELSETELEM:
+               nft_set_elem_destroy(nft_trans_elem_set(trans),
+                                    nft_trans_elem(trans).priv);
+               break;
        }
        kfree(trans);
 }
@@ -3546,7 +3624,7 @@ static int nf_tables_commit(struct sk_buff *skb)
        while (++net->nft.base_seq == 0);
 
        /* A new generation has just started */
-       net->nft.gencursor = gencursor_next(net);
+       net->nft.gencursor = nft_gencursor_next(net);
 
        /* Make sure all packets have left the previous generation before
         * purging old rules.
@@ -3617,24 +3695,21 @@ static int nf_tables_commit(struct sk_buff *skb)
                                             NFT_MSG_DELSET, GFP_KERNEL);
                        break;
                case NFT_MSG_NEWSETELEM:
-                       nf_tables_setelem_notify(&trans->ctx,
-                                                nft_trans_elem_set(trans),
-                                                &nft_trans_elem(trans),
+                       te = (struct nft_trans_elem *)trans->data;
+
+                       te->set->ops->activate(te->set, &te->elem);
+                       nf_tables_setelem_notify(&trans->ctx, te->set,
+                                                &te->elem,
                                                 NFT_MSG_NEWSETELEM, 0);
                        nft_trans_destroy(trans);
                        break;
                case NFT_MSG_DELSETELEM:
                        te = (struct nft_trans_elem *)trans->data;
+
                        nf_tables_setelem_notify(&trans->ctx, te->set,
                                                 &te->elem,
                                                 NFT_MSG_DELSETELEM, 0);
-                       te->set->ops->get(te->set, &te->elem);
-                       nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
-                       if (te->set->flags & NFT_SET_MAP &&
-                           !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
-                               nft_data_uninit(&te->elem.data, te->set->dtype);
                        te->set->ops->remove(te->set, &te->elem);
-                       nft_trans_destroy(trans);
                        break;
                }
        }
@@ -3666,6 +3741,10 @@ static void nf_tables_abort_release(struct nft_trans *trans)
        case NFT_MSG_NEWSET:
                nft_set_destroy(nft_trans_set(trans));
                break;
+       case NFT_MSG_NEWSETELEM:
+               nft_set_elem_destroy(nft_trans_elem_set(trans),
+                                    nft_trans_elem(trans).priv);
+               break;
        }
        kfree(trans);
 }
@@ -3736,16 +3815,15 @@ static int nf_tables_abort(struct sk_buff *skb)
                case NFT_MSG_NEWSETELEM:
                        nft_trans_elem_set(trans)->nelems--;
                        te = (struct nft_trans_elem *)trans->data;
-                       te->set->ops->get(te->set, &te->elem);
-                       nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
-                       if (te->set->flags & NFT_SET_MAP &&
-                           !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
-                               nft_data_uninit(&te->elem.data, te->set->dtype);
+
                        te->set->ops->remove(te->set, &te->elem);
-                       nft_trans_destroy(trans);
                        break;
                case NFT_MSG_DELSETELEM:
+                       te = (struct nft_trans_elem *)trans->data;
+
                        nft_trans_elem_set(trans)->nelems++;
+                       te->set->ops->activate(te->set, &te->elem);
+
                        nft_trans_destroy(trans);
                        break;
                }
@@ -3820,13 +3898,18 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
                                        const struct nft_set_iter *iter,
                                        const struct nft_set_elem *elem)
 {
-       if (elem->flags & NFT_SET_ELEM_INTERVAL_END)
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+       const struct nft_data *data;
+
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+           *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
                return 0;
 
-       switch (elem->data.verdict) {
+       data = nft_set_ext_data(ext);
+       switch (data->verdict) {
        case NFT_JUMP:
        case NFT_GOTO:
-               return nf_tables_check_loops(ctx, elem->data.chain);
+               return nf_tables_check_loops(ctx, data->chain);
        default:
                return 0;
        }
index 77165bf023f36754ea717603ea5a28ca51c272ca..ef4dfcbaf149f4c207f0096ceb6b8a6c1aa3d924 100644 (file)
@@ -8,6 +8,7 @@
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/list.h>
@@ -37,7 +38,7 @@ static struct nf_loginfo trace_loginfo = {
        .type = NF_LOG_TYPE_LOG,
        .u = {
                .log = {
-                       .level = 4,
+                       .level = LOGLEVEL_WARNING,
                        .logflags = NF_LOG_MASK,
                },
        },
@@ -49,10 +50,10 @@ static void __nft_trace_packet(const struct nft_pktinfo *pkt,
 {
        struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
 
-       nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
-                     pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
-                     chain->table->name, chain->name, comments[type],
-                     rulenum);
+       nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
+                    pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
+                    chain->table->name, chain->name, comments[type],
+                    rulenum);
 }
 
 static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
@@ -112,6 +113,7 @@ unsigned int
 nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
 {
        const struct nft_chain *chain = ops->priv, *basechain = chain;
+       const struct net *net = read_pnet(&nft_base_chain(basechain)->pnet);
        const struct nft_rule *rule;
        const struct nft_expr *expr, *last;
        struct nft_data data[NFT_REG_MAX + 1];
@@ -119,11 +121,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
        struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
        struct nft_stats *stats;
        int rulenum;
-       /*
-        * Cache cursor to avoid problems in case that the cursor is updated
-        * while traversing the ruleset.
-        */
-       unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor);
+       unsigned int gencursor = nft_genmask_cur(net);
 
 do_chain:
        rulenum = 0;
index f9ce2195fd63e30e725fd96088e55cd2d8743160..c7e1a9d7d46f515c9ef80f67d8fffe630ddafb01 100644 (file)
 /* We target a hash table size of 4, element hint is 75% of final size */
 #define NFT_HASH_ELEMENT_HINT 3
 
+struct nft_hash {
+       struct rhashtable               ht;
+};
+
 struct nft_hash_elem {
        struct rhash_head               node;
-       struct nft_data                 key;
-       struct nft_data                 data[];
+       struct nft_set_ext              ext;
+};
+
+struct nft_hash_cmp_arg {
+       const struct nft_set            *set;
+       const struct nft_data           *key;
+       u8                              genmask;
 };
 
 static const struct rhashtable_params nft_hash_params;
 
-static bool nft_hash_lookup(const struct nft_set *set,
-                           const struct nft_data *key,
-                           struct nft_data *data)
+static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
 {
-       struct rhashtable *priv = nft_set_priv(set);
-       const struct nft_hash_elem *he;
-
-       he = rhashtable_lookup_fast(priv, key, nft_hash_params);
-       if (he && set->flags & NFT_SET_MAP)
-               nft_data_copy(data, he->data);
+       const struct nft_hash_cmp_arg *arg = data;
 
-       return !!he;
+       return jhash(arg->key, len, seed);
 }
 
-static int nft_hash_insert(const struct nft_set *set,
-                          const struct nft_set_elem *elem)
+static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
 {
-       struct rhashtable *priv = nft_set_priv(set);
-       struct nft_hash_elem *he;
-       unsigned int size;
-       int err;
+       const struct nft_hash_elem *he = data;
 
-       if (elem->flags != 0)
-               return -EINVAL;
+       return jhash(nft_set_ext_key(&he->ext), len, seed);
+}
 
-       size = sizeof(*he);
-       if (set->flags & NFT_SET_MAP)
-               size += sizeof(he->data[0]);
+static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
+                              const void *ptr)
+{
+       const struct nft_hash_cmp_arg *x = arg->key;
+       const struct nft_hash_elem *he = ptr;
 
-       he = kzalloc(size, GFP_KERNEL);
-       if (he == NULL)
-               return -ENOMEM;
+       if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
+               return 1;
+       if (!nft_set_elem_active(&he->ext, x->genmask))
+               return 1;
+       return 0;
+}
 
-       nft_data_copy(&he->key, &elem->key);
-       if (set->flags & NFT_SET_MAP)
-               nft_data_copy(he->data, &elem->data);
+static bool nft_hash_lookup(const struct nft_set *set,
+                           const struct nft_data *key,
+                           const struct nft_set_ext **ext)
+{
+       struct nft_hash *priv = nft_set_priv(set);
+       const struct nft_hash_elem *he;
+       struct nft_hash_cmp_arg arg = {
+               .genmask = nft_genmask_cur(read_pnet(&set->pnet)),
+               .set     = set,
+               .key     = key,
+       };
 
-       err = rhashtable_insert_fast(priv, &he->node, nft_hash_params);
-       if (err)
-               kfree(he);
+       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+       if (he != NULL)
+               *ext = &he->ext;
 
-       return err;
+       return !!he;
 }
 
-static void nft_hash_elem_destroy(const struct nft_set *set,
-                                 struct nft_hash_elem *he)
+static int nft_hash_insert(const struct nft_set *set,
+                          const struct nft_set_elem *elem)
 {
-       nft_data_uninit(&he->key, NFT_DATA_VALUE);
-       if (set->flags & NFT_SET_MAP)
-               nft_data_uninit(he->data, set->dtype);
-       kfree(he);
+       struct nft_hash *priv = nft_set_priv(set);
+       struct nft_hash_elem *he = elem->priv;
+       struct nft_hash_cmp_arg arg = {
+               .genmask = nft_genmask_next(read_pnet(&set->pnet)),
+               .set     = set,
+               .key     = &elem->key,
+       };
+
+       return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+                                           nft_hash_params);
 }
 
-static void nft_hash_remove(const struct nft_set *set,
-                           const struct nft_set_elem *elem)
+static void nft_hash_activate(const struct nft_set *set,
+                             const struct nft_set_elem *elem)
 {
-       struct rhashtable *priv = nft_set_priv(set);
+       struct nft_hash_elem *he = elem->priv;
 
-       rhashtable_remove_fast(priv, elem->cookie, nft_hash_params);
-       synchronize_rcu();
-       kfree(elem->cookie);
+       nft_set_elem_change_active(set, &he->ext);
 }
 
-static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
+static void *nft_hash_deactivate(const struct nft_set *set,
+                                const struct nft_set_elem *elem)
 {
-       struct rhashtable *priv = nft_set_priv(set);
+       struct nft_hash *priv = nft_set_priv(set);
        struct nft_hash_elem *he;
+       struct nft_hash_cmp_arg arg = {
+               .genmask = nft_genmask_next(read_pnet(&set->pnet)),
+               .set     = set,
+               .key     = &elem->key,
+       };
 
-       he = rhashtable_lookup_fast(priv, &elem->key, nft_hash_params);
-       if (!he)
-               return -ENOENT;
+       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+       if (he != NULL)
+               nft_set_elem_change_active(set, &he->ext);
 
-       elem->cookie = he;
-       elem->flags = 0;
-       if (set->flags & NFT_SET_MAP)
-               nft_data_copy(&elem->data, he->data);
+       return he;
+}
 
-       return 0;
+static void nft_hash_remove(const struct nft_set *set,
+                           const struct nft_set_elem *elem)
+{
+       struct nft_hash *priv = nft_set_priv(set);
+       struct nft_hash_elem *he = elem->priv;
+
+       rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
 }
 
 static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
                          struct nft_set_iter *iter)
 {
-       struct rhashtable *priv = nft_set_priv(set);
-       const struct nft_hash_elem *he;
+       struct nft_hash *priv = nft_set_priv(set);
+       struct nft_hash_elem *he;
        struct rhashtable_iter hti;
        struct nft_set_elem elem;
+       u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
        int err;
 
-       err = rhashtable_walk_init(priv, &hti);
+       err = rhashtable_walk_init(&priv->ht, &hti);
        iter->err = err;
        if (err)
                return;
@@ -144,11 +170,10 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 
                if (iter->count < iter->skip)
                        goto cont;
+               if (!nft_set_elem_active(&he->ext, genmask))
+                       goto cont;
 
-               memcpy(&elem.key, &he->key, sizeof(elem.key));
-               if (set->flags & NFT_SET_MAP)
-                       memcpy(&elem.data, he->data, sizeof(elem.data));
-               elem.flags = 0;
+               elem.priv = he;
 
                iter->err = iter->fn(ctx, set, iter, &elem);
                if (iter->err < 0)
@@ -165,37 +190,40 @@ out:
 
 static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
 {
-       return sizeof(struct rhashtable);
+       return sizeof(struct nft_hash);
 }
 
 static const struct rhashtable_params nft_hash_params = {
-       .head_offset = offsetof(struct nft_hash_elem, node),
-       .key_offset = offsetof(struct nft_hash_elem, key),
-       .hashfn = jhash,
-       .automatic_shrinking = true,
+       .head_offset            = offsetof(struct nft_hash_elem, node),
+       .hashfn                 = nft_hash_key,
+       .obj_hashfn             = nft_hash_obj,
+       .obj_cmpfn              = nft_hash_cmp,
+       .automatic_shrinking    = true,
 };
 
 static int nft_hash_init(const struct nft_set *set,
                         const struct nft_set_desc *desc,
                         const struct nlattr * const tb[])
 {
-       struct rhashtable *priv = nft_set_priv(set);
+       struct nft_hash *priv = nft_set_priv(set);
        struct rhashtable_params params = nft_hash_params;
 
        params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
-       params.key_len = set->klen;
+       params.key_len    = set->klen;
 
-       return rhashtable_init(priv, &params);
+       return rhashtable_init(&priv->ht, &params);
 }
 
-static void nft_free_element(void *ptr, void *arg)
+static void nft_hash_elem_destroy(void *ptr, void *arg)
 {
-       nft_hash_elem_destroy((const struct nft_set *)arg, ptr);
+       nft_set_elem_destroy((const struct nft_set *)arg, ptr);
 }
 
 static void nft_hash_destroy(const struct nft_set *set)
 {
-       rhashtable_free_and_destroy(nft_set_priv(set), nft_free_element,
+       struct nft_hash *priv = nft_set_priv(set);
+
+       rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
                                    (void *)set);
 }
 
@@ -205,11 +233,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
        unsigned int esize;
 
        esize = sizeof(struct nft_hash_elem);
-       if (features & NFT_SET_MAP)
-               esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
-
        if (desc->size) {
-               est->size = sizeof(struct rhashtable) +
+               est->size = sizeof(struct nft_hash) +
                            roundup_pow_of_two(desc->size * 4 / 3) *
                            sizeof(struct nft_hash_elem *) +
                            desc->size * esize;
@@ -229,11 +254,13 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
 
 static struct nft_set_ops nft_hash_ops __read_mostly = {
        .privsize       = nft_hash_privsize,
+       .elemsize       = offsetof(struct nft_hash_elem, ext),
        .estimate       = nft_hash_estimate,
        .init           = nft_hash_init,
        .destroy        = nft_hash_destroy,
-       .get            = nft_hash_get,
        .insert         = nft_hash_insert,
+       .activate       = nft_hash_activate,
+       .deactivate     = nft_hash_deactivate,
        .remove         = nft_hash_remove,
        .lookup         = nft_hash_lookup,
        .walk           = nft_hash_walk,
index bde05f28cf14782b3b9f11905fbed37ea8644b95..e18af9db2f04ec0a58d4ed563af13f7d253b8f0a 100644 (file)
@@ -78,7 +78,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
                        li->u.log.level =
                                ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL]));
                } else {
-                       li->u.log.level = 4;
+                       li->u.log.level = LOGLEVEL_WARNING;
                }
                if (tb[NFTA_LOG_FLAGS] != NULL) {
                        li->u.log.logflags =
index 9615b8b9fb37dcf769207537f0545dd2a08c62d6..a5f30b8760eab5aa476f0afc13fe0e8686c9ff47 100644 (file)
@@ -31,9 +31,13 @@ static void nft_lookup_eval(const struct nft_expr *expr,
 {
        const struct nft_lookup *priv = nft_expr_priv(expr);
        const struct nft_set *set = priv->set;
+       const struct nft_set_ext *ext;
 
-       if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg]))
+       if (set->ops->lookup(set, &data[priv->sreg], &ext)) {
+               if (set->flags & NFT_SET_MAP)
+                       nft_data_copy(&data[priv->dreg], nft_set_ext_data(ext));
                return;
+       }
        data[NFT_REG_VERDICT].verdict = NFT_BREAK;
 }
 
index abe68119a76c5cf7594de49fb7d7d7cbd894c312..5197874372ec4a2055a3f9251f3a3ec248f53fbb 100644 (file)
@@ -153,7 +153,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
                }
                break;
        case NFT_META_CPU:
-               dest->data[0] = smp_processor_id();
+               dest->data[0] = raw_smp_processor_id();
                break;
        case NFT_META_IIFGROUP:
                if (in == NULL)
index 2c75361077f7e5b903b851f2df09873892f37f7e..42d0ca45fb9e9e64daf3799aad420f205c5586e0 100644 (file)
@@ -26,18 +26,18 @@ struct nft_rbtree {
 
 struct nft_rbtree_elem {
        struct rb_node          node;
-       u16                     flags;
-       struct nft_data         key;
-       struct nft_data         data[];
+       struct nft_set_ext      ext;
 };
 
+
 static bool nft_rbtree_lookup(const struct nft_set *set,
                              const struct nft_data *key,
-                             struct nft_data *data)
+                             const struct nft_set_ext **ext)
 {
        const struct nft_rbtree *priv = nft_set_priv(set);
        const struct nft_rbtree_elem *rbe, *interval = NULL;
        const struct rb_node *parent;
+       u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
        int d;
 
        spin_lock_bh(&nft_rbtree_lock);
@@ -45,7 +45,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set,
        while (parent != NULL) {
                rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 
-               d = nft_data_cmp(&rbe->key, key, set->klen);
+               d = nft_data_cmp(nft_set_ext_key(&rbe->ext), key, set->klen);
                if (d < 0) {
                        parent = parent->rb_left;
                        interval = rbe;
@@ -53,12 +53,17 @@ static bool nft_rbtree_lookup(const struct nft_set *set,
                        parent = parent->rb_right;
                else {
 found:
-                       if (rbe->flags & NFT_SET_ELEM_INTERVAL_END)
+                       if (!nft_set_elem_active(&rbe->ext, genmask)) {
+                               parent = parent->rb_left;
+                               continue;
+                       }
+                       if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
+                           *nft_set_ext_flags(&rbe->ext) &
+                           NFT_SET_ELEM_INTERVAL_END)
                                goto out;
-                       if (set->flags & NFT_SET_MAP)
-                               nft_data_copy(data, rbe->data);
-
                        spin_unlock_bh(&nft_rbtree_lock);
+
+                       *ext = &rbe->ext;
                        return true;
                }
        }
@@ -72,23 +77,13 @@ out:
        return false;
 }
 
-static void nft_rbtree_elem_destroy(const struct nft_set *set,
-                                   struct nft_rbtree_elem *rbe)
-{
-       nft_data_uninit(&rbe->key, NFT_DATA_VALUE);
-       if (set->flags & NFT_SET_MAP &&
-           !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
-               nft_data_uninit(rbe->data, set->dtype);
-
-       kfree(rbe);
-}
-
 static int __nft_rbtree_insert(const struct nft_set *set,
                               struct nft_rbtree_elem *new)
 {
        struct nft_rbtree *priv = nft_set_priv(set);
        struct nft_rbtree_elem *rbe;
        struct rb_node *parent, **p;
+       u8 genmask = nft_genmask_next(read_pnet(&set->pnet));
        int d;
 
        parent = NULL;
@@ -96,13 +91,18 @@ static int __nft_rbtree_insert(const struct nft_set *set,
        while (*p != NULL) {
                parent = *p;
                rbe = rb_entry(parent, struct nft_rbtree_elem, node);
-               d = nft_data_cmp(&rbe->key, &new->key, set->klen);
+               d = nft_data_cmp(nft_set_ext_key(&rbe->ext),
+                                nft_set_ext_key(&new->ext),
+                                set->klen);
                if (d < 0)
                        p = &parent->rb_left;
                else if (d > 0)
                        p = &parent->rb_right;
-               else
-                       return -EEXIST;
+               else {
+                       if (nft_set_elem_active(&rbe->ext, genmask))
+                               return -EEXIST;
+                       p = &parent->rb_left;
+               }
        }
        rb_link_node(&new->node, parent, p);
        rb_insert_color(&new->node, &priv->root);
@@ -112,31 +112,13 @@ static int __nft_rbtree_insert(const struct nft_set *set,
 static int nft_rbtree_insert(const struct nft_set *set,
                             const struct nft_set_elem *elem)
 {
-       struct nft_rbtree_elem *rbe;
-       unsigned int size;
+       struct nft_rbtree_elem *rbe = elem->priv;
        int err;
 
-       size = sizeof(*rbe);
-       if (set->flags & NFT_SET_MAP &&
-           !(elem->flags & NFT_SET_ELEM_INTERVAL_END))
-               size += sizeof(rbe->data[0]);
-
-       rbe = kzalloc(size, GFP_KERNEL);
-       if (rbe == NULL)
-               return -ENOMEM;
-
-       rbe->flags = elem->flags;
-       nft_data_copy(&rbe->key, &elem->key);
-       if (set->flags & NFT_SET_MAP &&
-           !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
-               nft_data_copy(rbe->data, &elem->data);
-
        spin_lock_bh(&nft_rbtree_lock);
        err = __nft_rbtree_insert(set, rbe);
-       if (err < 0)
-               kfree(rbe);
-
        spin_unlock_bh(&nft_rbtree_lock);
+
        return err;
 }
 
@@ -144,39 +126,49 @@ static void nft_rbtree_remove(const struct nft_set *set,
                              const struct nft_set_elem *elem)
 {
        struct nft_rbtree *priv = nft_set_priv(set);
-       struct nft_rbtree_elem *rbe = elem->cookie;
+       struct nft_rbtree_elem *rbe = elem->priv;
 
        spin_lock_bh(&nft_rbtree_lock);
        rb_erase(&rbe->node, &priv->root);
        spin_unlock_bh(&nft_rbtree_lock);
-       kfree(rbe);
 }
 
-static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
+static void nft_rbtree_activate(const struct nft_set *set,
+                               const struct nft_set_elem *elem)
+{
+       struct nft_rbtree_elem *rbe = elem->priv;
+
+       nft_set_elem_change_active(set, &rbe->ext);
+}
+
+static void *nft_rbtree_deactivate(const struct nft_set *set,
+                                  const struct nft_set_elem *elem)
 {
        const struct nft_rbtree *priv = nft_set_priv(set);
        const struct rb_node *parent = priv->root.rb_node;
        struct nft_rbtree_elem *rbe;
+       u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
        int d;
 
        while (parent != NULL) {
                rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 
-               d = nft_data_cmp(&rbe->key, &elem->key, set->klen);
+               d = nft_data_cmp(nft_set_ext_key(&rbe->ext), &elem->key,
+                                set->klen);
                if (d < 0)
                        parent = parent->rb_left;
                else if (d > 0)
                        parent = parent->rb_right;
                else {
-                       elem->cookie = rbe;
-                       if (set->flags & NFT_SET_MAP &&
-                           !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
-                               nft_data_copy(&elem->data, rbe->data);
-                       elem->flags = rbe->flags;
-                       return 0;
+                       if (!nft_set_elem_active(&rbe->ext, genmask)) {
+                               parent = parent->rb_left;
+                               continue;
+                       }
+                       nft_set_elem_change_active(set, &rbe->ext);
+                       return rbe;
                }
        }
-       return -ENOENT;
+       return NULL;
 }
 
 static void nft_rbtree_walk(const struct nft_ctx *ctx,
@@ -184,21 +176,21 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
                            struct nft_set_iter *iter)
 {
        const struct nft_rbtree *priv = nft_set_priv(set);
-       const struct nft_rbtree_elem *rbe;
+       struct nft_rbtree_elem *rbe;
        struct nft_set_elem elem;
        struct rb_node *node;
+       u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
 
        spin_lock_bh(&nft_rbtree_lock);
        for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+               rbe = rb_entry(node, struct nft_rbtree_elem, node);
+
                if (iter->count < iter->skip)
                        goto cont;
+               if (!nft_set_elem_active(&rbe->ext, genmask))
+                       goto cont;
 
-               rbe = rb_entry(node, struct nft_rbtree_elem, node);
-               nft_data_copy(&elem.key, &rbe->key);
-               if (set->flags & NFT_SET_MAP &&
-                   !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
-                       nft_data_copy(&elem.data, rbe->data);
-               elem.flags = rbe->flags;
+               elem.priv = rbe;
 
                iter->err = iter->fn(ctx, set, iter, &elem);
                if (iter->err < 0) {
@@ -235,7 +227,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
        while ((node = priv->root.rb_node) != NULL) {
                rb_erase(node, &priv->root);
                rbe = rb_entry(node, struct nft_rbtree_elem, node);
-               nft_rbtree_elem_destroy(set, rbe);
+               nft_set_elem_destroy(set, rbe);
        }
 }
 
@@ -245,9 +237,6 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
        unsigned int nsize;
 
        nsize = sizeof(struct nft_rbtree_elem);
-       if (features & NFT_SET_MAP)
-               nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]);
-
        if (desc->size)
                est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
        else
@@ -260,12 +249,14 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
 
 static struct nft_set_ops nft_rbtree_ops __read_mostly = {
        .privsize       = nft_rbtree_privsize,
+       .elemsize       = offsetof(struct nft_rbtree_elem, ext),
        .estimate       = nft_rbtree_estimate,
        .init           = nft_rbtree_init,
        .destroy        = nft_rbtree_destroy,
        .insert         = nft_rbtree_insert,
        .remove         = nft_rbtree_remove,
-       .get            = nft_rbtree_get,
+       .deactivate     = nft_rbtree_deactivate,
+       .activate       = nft_rbtree_activate,
        .lookup         = nft_rbtree_lookup,
        .walk           = nft_rbtree_walk,
        .features       = NFT_SET_INTERVAL | NFT_SET_MAP,
index 4caa809dbbe0332ca32b7b0279e61289b01574e3..19909d0786a2e60574ba623d343193ca212ed2be 100644 (file)
@@ -3127,7 +3127,7 @@ static struct pernet_operations __net_initdata netlink_net_ops = {
        .exit = netlink_net_exit,
 };
 
-static inline u32 netlink_hash(const void *data, u32 seed)
+static inline u32 netlink_hash(const void *data, u32 len, u32 seed)
 {
        const struct netlink_sock *nlk = data;
        struct netlink_compare_arg arg;
index 79355531c3e25bddeeed19934533cf9786a75e83..ae558dd7f8eec3f24eca4c301c36ad8da07389df 100644 (file)
@@ -62,21 +62,8 @@ static void tipc_bclink_lock(struct net *net)
 static void tipc_bclink_unlock(struct net *net)
 {
        struct tipc_net *tn = net_generic(net, tipc_net_id);
-       struct tipc_node *node = NULL;
 
-       if (likely(!tn->bclink->flags)) {
-               spin_unlock_bh(&tn->bclink->lock);
-               return;
-       }
-
-       if (tn->bclink->flags & TIPC_BCLINK_RESET) {
-               tn->bclink->flags &= ~TIPC_BCLINK_RESET;
-               node = tipc_bclink_retransmit_to(net);
-       }
        spin_unlock_bh(&tn->bclink->lock);
-
-       if (node)
-               tipc_link_reset_all(node);
 }
 
 void tipc_bclink_input(struct net *net)
@@ -91,13 +78,6 @@ uint  tipc_bclink_get_mtu(void)
        return MAX_PKT_DEFAULT_MCAST;
 }
 
-void tipc_bclink_set_flags(struct net *net, unsigned int flags)
-{
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-       tn->bclink->flags |= flags;
-}
-
 static u32 bcbuf_acks(struct sk_buff *buf)
 {
        return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle;
@@ -156,7 +136,6 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno)
                                                seqno : node->bclink.last_sent;
 }
 
-
 /**
  * tipc_bclink_retransmit_to - get most recent node to request retransmission
  *
@@ -350,13 +329,12 @@ static void bclink_peek_nack(struct net *net, struct tipc_msg *msg)
                return;
 
        tipc_node_lock(n_ptr);
-
        if (n_ptr->bclink.recv_permitted &&
            (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) &&
            (n_ptr->bclink.last_in == msg_bcgap_after(msg)))
                n_ptr->bclink.oos_state = 2;
-
        tipc_node_unlock(n_ptr);
+       tipc_node_put(n_ptr);
 }
 
 /* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster
@@ -476,17 +454,18 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
                        goto unlock;
                if (msg_destnode(msg) == tn->own_addr) {
                        tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
-                       tipc_node_unlock(node);
                        tipc_bclink_lock(net);
                        bcl->stats.recv_nacks++;
                        tn->bclink->retransmit_to = node;
                        bclink_retransmit_pkt(tn, msg_bcgap_after(msg),
                                              msg_bcgap_to(msg));
                        tipc_bclink_unlock(net);
+                       tipc_node_unlock(node);
                } else {
                        tipc_node_unlock(node);
                        bclink_peek_nack(net, msg);
                }
+               tipc_node_put(node);
                goto exit;
        }
 
@@ -591,6 +570,7 @@ receive:
 
 unlock:
        tipc_node_unlock(node);
+       tipc_node_put(node);
 exit:
        kfree_skb(buf);
 }
index 43f397fbac55908c6312c96e3d71f8dd0223d414..4bdc12277d33ff8eb382b755dd1323424d9d43a2 100644 (file)
@@ -55,7 +55,6 @@ struct tipc_bcbearer_pair {
        struct tipc_bearer *secondary;
 };
 
-#define TIPC_BCLINK_RESET      1
 #define        BCBEARER                MAX_BEARERS
 
 /**
@@ -86,7 +85,6 @@ struct tipc_bcbearer {
  * @lock: spinlock governing access to structure
  * @link: (non-standard) broadcast link structure
  * @node: (non-standard) node structure representing b'cast link's peer node
- * @flags: represent bclink states
  * @bcast_nodes: map of broadcast-capable nodes
  * @retransmit_to: node that most recently requested a retransmit
  *
@@ -96,7 +94,6 @@ struct tipc_bclink {
        spinlock_t lock;
        struct tipc_link link;
        struct tipc_node node;
-       unsigned int flags;
        struct sk_buff_head arrvq;
        struct sk_buff_head inputq;
        struct tipc_node_map bcast_nodes;
@@ -117,7 +114,6 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a,
 
 int tipc_bclink_init(struct net *net);
 void tipc_bclink_stop(struct net *net);
-void tipc_bclink_set_flags(struct net *tn, unsigned int flags);
 void tipc_bclink_add_node(struct net *net, u32 addr);
 void tipc_bclink_remove_node(struct net *net, u32 addr);
 struct tipc_node *tipc_bclink_retransmit_to(struct net *tn);
index 169f3dd038b983c40ff9219dcec7d25d256ee6d3..967e292f53c89182bc0ed128b1dacd51fe02d090 100644 (file)
@@ -260,6 +260,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
                }
        }
        tipc_node_unlock(node);
+       tipc_node_put(node);
 }
 
 /**
index 1287161e9424a854ab18e8442fdf74528ac7cec7..514466efc25cd64961c64d39bba2fce6801ff757 100644 (file)
@@ -854,6 +854,7 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
                if (link)
                        rc = __tipc_link_xmit(net, link, list);
                tipc_node_unlock(node);
+               tipc_node_put(node);
        }
        if (link)
                return rc;
@@ -980,7 +981,6 @@ static void link_retransmit_failure(struct tipc_link *l_ptr,
                        (unsigned long) TIPC_SKB_CB(buf)->handle);
 
                n_ptr = tipc_bclink_retransmit_to(net);
-               tipc_node_lock(n_ptr);
 
                tipc_addr_string_fill(addr_string, n_ptr->addr);
                pr_info("Broadcast link info for %s\n", addr_string);
@@ -992,9 +992,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr,
                        n_ptr->bclink.oos_state,
                        n_ptr->bclink.last_sent);
 
-               tipc_node_unlock(n_ptr);
-
-               tipc_bclink_set_flags(net, TIPC_BCLINK_RESET);
+               n_ptr->action_flags |= TIPC_BCAST_RESET;
                l_ptr->stale_count = 0;
        }
 }
@@ -1119,8 +1117,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
                n_ptr = tipc_node_find(net, msg_prevnode(msg));
                if (unlikely(!n_ptr))
                        goto discard;
-               tipc_node_lock(n_ptr);
 
+               tipc_node_lock(n_ptr);
                /* Locate unicast link endpoint that should handle message */
                l_ptr = n_ptr->links[b_ptr->identity];
                if (unlikely(!l_ptr))
@@ -1208,6 +1206,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
                skb = NULL;
 unlock:
                tipc_node_unlock(n_ptr);
+               tipc_node_put(n_ptr);
 discard:
                if (unlikely(skb))
                        kfree_skb(skb);
@@ -2239,7 +2238,6 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb)
        msg.seq = cb->nlh->nlmsg_seq;
 
        rcu_read_lock();
-
        if (prev_node) {
                node = tipc_node_find(net, prev_node);
                if (!node) {
@@ -2252,6 +2250,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb)
                        cb->prev_seq = 1;
                        goto out;
                }
+               tipc_node_put(node);
 
                list_for_each_entry_continue_rcu(node, &tn->node_list,
                                                 list) {
@@ -2259,6 +2258,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb)
                        err = __tipc_nl_add_node_links(net, &msg, node,
                                                       &prev_link);
                        tipc_node_unlock(node);
+                       tipc_node_put(node);
                        if (err)
                                goto out;
 
index 506aaa565da7f39abd30b8171760db67c55a4362..41e7b7e4dda0818469c17ff8b6e48aa1654a23ff 100644 (file)
@@ -244,6 +244,7 @@ static void tipc_publ_subscribe(struct net *net, struct publication *publ,
        tipc_node_lock(node);
        list_add_tail(&publ->nodesub_list, &node->publ_list);
        tipc_node_unlock(node);
+       tipc_node_put(node);
 }
 
 static void tipc_publ_unsubscribe(struct net *net, struct publication *publ,
@@ -258,6 +259,7 @@ static void tipc_publ_unsubscribe(struct net *net, struct publication *publ,
        tipc_node_lock(node);
        list_del_init(&publ->nodesub_list);
        tipc_node_unlock(node);
+       tipc_node_put(node);
 }
 
 /**
index 26d1de1bf34d7b6b3254feae92fdc01068e75b3a..3e4f04897c03ea3d9e98d339aa0b5da72013b8bc 100644 (file)
@@ -42,6 +42,7 @@
 
 static void node_lost_contact(struct tipc_node *n_ptr);
 static void node_established_contact(struct tipc_node *n_ptr);
+static void tipc_node_delete(struct tipc_node *node);
 
 struct tipc_sock_conn {
        u32 port;
@@ -67,6 +68,23 @@ static unsigned int tipc_hashfn(u32 addr)
        return addr & (NODE_HTABLE_SIZE - 1);
 }
 
+static void tipc_node_kref_release(struct kref *kref)
+{
+       struct tipc_node *node = container_of(kref, struct tipc_node, kref);
+
+       tipc_node_delete(node);
+}
+
+void tipc_node_put(struct tipc_node *node)
+{
+       kref_put(&node->kref, tipc_node_kref_release);
+}
+
+static void tipc_node_get(struct tipc_node *node)
+{
+       kref_get(&node->kref);
+}
+
 /*
  * tipc_node_find - locate specified node object, if it exists
  */
@@ -82,6 +100,7 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr)
        hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)],
                                 hash) {
                if (node->addr == addr) {
+                       tipc_node_get(node);
                        rcu_read_unlock();
                        return node;
                }
@@ -106,6 +125,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr)
        }
        n_ptr->addr = addr;
        n_ptr->net = net;
+       kref_init(&n_ptr->kref);
        spin_lock_init(&n_ptr->lock);
        INIT_HLIST_NODE(&n_ptr->hash);
        INIT_LIST_HEAD(&n_ptr->list);
@@ -120,16 +140,17 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr)
        list_add_tail_rcu(&n_ptr->list, &temp_node->list);
        n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN;
        n_ptr->signature = INVALID_NODE_SIG;
+       tipc_node_get(n_ptr);
 exit:
        spin_unlock_bh(&tn->node_list_lock);
        return n_ptr;
 }
 
-static void tipc_node_delete(struct tipc_net *tn, struct tipc_node *n_ptr)
+static void tipc_node_delete(struct tipc_node *node)
 {
-       list_del_rcu(&n_ptr->list);
-       hlist_del_rcu(&n_ptr->hash);
-       kfree_rcu(n_ptr, rcu);
+       list_del_rcu(&node->list);
+       hlist_del_rcu(&node->hash);
+       kfree_rcu(node, rcu);
 }
 
 void tipc_node_stop(struct net *net)
@@ -139,7 +160,7 @@ void tipc_node_stop(struct net *net)
 
        spin_lock_bh(&tn->node_list_lock);
        list_for_each_entry_safe(node, t_node, &tn->node_list, list)
-               tipc_node_delete(tn, node);
+               tipc_node_put(node);
        spin_unlock_bh(&tn->node_list_lock);
 }
 
@@ -147,6 +168,7 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port)
 {
        struct tipc_node *node;
        struct tipc_sock_conn *conn;
+       int err = 0;
 
        if (in_own_node(net, dnode))
                return 0;
@@ -157,8 +179,10 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port)
                return -EHOSTUNREACH;
        }
        conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
-       if (!conn)
-               return -EHOSTUNREACH;
+       if (!conn) {
+               err = -EHOSTUNREACH;
+               goto exit;
+       }
        conn->peer_node = dnode;
        conn->port = port;
        conn->peer_port = peer_port;
@@ -166,7 +190,9 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port)
        tipc_node_lock(node);
        list_add_tail(&conn->list, &node->conn_sks);
        tipc_node_unlock(node);
-       return 0;
+exit:
+       tipc_node_put(node);
+       return err;
 }
 
 void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
@@ -189,6 +215,7 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
                kfree(conn);
        }
        tipc_node_unlock(node);
+       tipc_node_put(node);
 }
 
 /**
@@ -417,19 +444,25 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr,
                           char *linkname, size_t len)
 {
        struct tipc_link *link;
+       int err = -EINVAL;
        struct tipc_node *node = tipc_node_find(net, addr);
 
-       if ((bearer_id >= MAX_BEARERS) || !node)
-               return -EINVAL;
+       if (!node)
+               return err;
+
+       if (bearer_id >= MAX_BEARERS)
+               goto exit;
+
        tipc_node_lock(node);
        link = node->links[bearer_id];
        if (link) {
                strncpy(linkname, link->name, len);
-               tipc_node_unlock(node);
-               return 0;
+               err = 0;
        }
+exit:
        tipc_node_unlock(node);
-       return -EINVAL;
+       tipc_node_put(node);
+       return err;
 }
 
 void tipc_node_unlock(struct tipc_node *node)
@@ -459,7 +492,7 @@ void tipc_node_unlock(struct tipc_node *node)
                                TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
                                TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP |
                                TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT |
-                               TIPC_NAMED_MSG_EVT);
+                               TIPC_NAMED_MSG_EVT | TIPC_BCAST_RESET);
 
        spin_unlock_bh(&node->lock);
 
@@ -488,6 +521,9 @@ void tipc_node_unlock(struct tipc_node *node)
 
        if (flags & TIPC_BCAST_MSG_EVT)
                tipc_bclink_input(net);
+
+       if (flags & TIPC_BCAST_RESET)
+               tipc_link_reset_all(node);
 }
 
 /* Caller should hold node lock for the passed node */
@@ -542,17 +578,21 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
        msg.seq = cb->nlh->nlmsg_seq;
 
        rcu_read_lock();
-
-       if (last_addr && !tipc_node_find(net, last_addr)) {
-               rcu_read_unlock();
-               /* We never set seq or call nl_dump_check_consistent() this
-                * means that setting prev_seq here will cause the consistence
-                * check to fail in the netlink callback handler. Resulting in
-                * the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if
-                * the node state changed while we released the lock.
-                */
-               cb->prev_seq = 1;
-               return -EPIPE;
+       if (last_addr) {
+               node = tipc_node_find(net, last_addr);
+               if (!node) {
+                       rcu_read_unlock();
+                       /* We never set seq or call nl_dump_check_consistent()
+                        * this means that setting prev_seq here will cause the
+                        * consistence check to fail in the netlink callback
+                        * handler. Resulting in the NLMSG_DONE message having
+                        * the NLM_F_DUMP_INTR flag set if the node state
+                        * changed while we released the lock.
+                        */
+                       cb->prev_seq = 1;
+                       return -EPIPE;
+               }
+               tipc_node_put(node);
        }
 
        list_for_each_entry_rcu(node, &tn->node_list, list) {
index e89ac04ec2c3e3420398b8fa0478b815eccf92ee..02d5c20dc5511a1669c0262e17d1127a5b2b6a1e 100644 (file)
@@ -64,7 +64,8 @@ enum {
        TIPC_NOTIFY_LINK_UP             = (1 << 6),
        TIPC_NOTIFY_LINK_DOWN           = (1 << 7),
        TIPC_NAMED_MSG_EVT              = (1 << 8),
-       TIPC_BCAST_MSG_EVT              = (1 << 9)
+       TIPC_BCAST_MSG_EVT              = (1 << 9),
+       TIPC_BCAST_RESET                = (1 << 10)
 };
 
 /**
@@ -93,6 +94,7 @@ struct tipc_node_bclink {
 /**
  * struct tipc_node - TIPC node structure
  * @addr: network address of node
+ * @ref: reference counter to node object
  * @lock: spinlock governing access to structure
  * @net: the applicable net namespace
  * @hash: links to adjacent nodes in unsorted hash chain
@@ -114,6 +116,7 @@ struct tipc_node_bclink {
  */
 struct tipc_node {
        u32 addr;
+       struct kref kref;
        spinlock_t lock;
        struct net *net;
        struct hlist_node hash;
@@ -136,6 +139,7 @@ struct tipc_node {
 };
 
 struct tipc_node *tipc_node_find(struct net *net, u32 addr);
+void tipc_node_put(struct tipc_node *node);
 struct tipc_node *tipc_node_create(struct net *net, u32 addr);
 void tipc_node_stop(struct net *net);
 void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
@@ -170,10 +174,12 @@ static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector)
 
        node = tipc_node_find(net, addr);
 
-       if (likely(node))
+       if (likely(node)) {
                mtu = node->act_mtus[selector & 1];
-       else
+               tipc_node_put(node);
+       } else {
                mtu = MAX_MSG_SIZE;
+       }
 
        return mtu;
 }