- "enet_csr": Ethernet control and status register address space
- "ring_csr": Descriptor ring control and status register address space
- "ring_cmd": Descriptor ring command register address space
-- interrupts: Ethernet main interrupt
+- interrupts: Two interrupt specifiers can be specified.
+ - First is the Rx interrupt. This irq is mandatory.
+ - Second is the Tx completion interrupt.
+ This is supported only on SGMII based 1GbE and 10GbE interfaces.
- port-id: Port number (0 or 1)
- clocks: Reference to the clock entry.
- local-mac-address: MAC address assigned to this device
<0x0 0x1f200000 0x0 0Xc300>,
<0x0 0x1B000000 0x0 0X200>;
reg-names = "enet_csr", "ring_csr", "ring_cmd";
- interrupts = <0x0 0xA0 0x4>;
+ interrupts = <0x0 0xA0 0x4>,
+ <0x0 0xA1 0x4>;
dma-coherent;
clocks = <&sge0clk 0>;
local-mac-address = [00 00 00 00 00 00];
<0x0 0x1f200000 0x0 0Xc300>,
<0x0 0x1B000000 0x0 0X8000>;
reg-names = "enet_csr", "ring_csr", "ring_cmd";
- interrupts = <0x0 0xAC 0x4>;
+ interrupts = <0x0 0xAC 0x4>,
+ <0x0 0xAD 0x4>;
port-id = <1>;
dma-coherent;
clocks = <&sge1clk 0>;
<0x0 0x1f600000 0x0 0Xc300>,
<0x0 0x18000000 0x0 0X200>;
reg-names = "enet_csr", "ring_csr", "ring_cmd";
- interrupts = <0x0 0x60 0x4>;
+ interrupts = <0x0 0x60 0x4>,
+ <0x0 0x61 0x4>;
dma-coherent;
clocks = <&xge0clk 0>;
/* mac address will be overwritten by the bootloader */
.ndo_fix_features = bond_fix_features,
.ndo_bridge_setlink = ndo_dflt_netdev_switch_port_bridge_setlink,
.ndo_bridge_dellink = ndo_dflt_netdev_switch_port_bridge_dellink,
+ .ndo_features_check = passthru_features_check,
};
static const struct device_type bond_type = {
val |= 0x000c;
REG_WRITE(addr, 0x04, val);
- /* Port Control 1: disable trunking. Also, if this is the
- * CPU port, enable learn messages to be sent to this port.
- */
- REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000);
-
- /* Port based VLAN map: give each port its own address
- * database, allow the CPU port to talk to each of the 'real'
- * ports, and allow each of the 'real' ports to only talk to
- * the upstream port.
- */
- val = (p & 0xf) << 12;
- if (dsa_is_cpu_port(ds, p))
- val |= ds->phys_port_mask;
- else
- val |= 1 << dsa_upstream_port(ds);
- REG_WRITE(addr, 0x06, val);
-
- /* Default VLAN ID and priority: don't set a default VLAN
- * ID, and set the default packet priority to zero.
- */
- REG_WRITE(addr, 0x07, 0x0000);
-
/* Port Control 2: don't force a good FCS, set the maximum
* frame size to 10240 bytes, don't let the switch add or
* strip 802.1q tags, don't discard tagged or untagged frames
*/
REG_WRITE(addr, 0x19, 0x7654);
- return 0;
+ return mv88e6xxx_setup_port_common(ds, p);
}
static int mv88e6123_61_65_setup(struct dsa_switch *ds)
{
- struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
int i;
int ret;
- mutex_init(&ps->smi_mutex);
- mutex_init(&ps->stats_mutex);
- mutex_init(&ps->phy_mutex);
+ ret = mv88e6xxx_setup_common(ds);
+ if (ret < 0)
+ return ret;
ret = mv88e6123_61_65_switch_reset(ds);
if (ret < 0)
#include <net/dsa.h>
#include "mv88e6xxx.h"
+/* Switch product IDs */
+#define ID_6171 0x1710
+#define ID_6172 0x1720
+
static char *mv88e6171_probe(struct device *host_dev, int sw_addr)
{
struct mii_bus *bus = dsa_host_dev_to_mii_bus(host_dev);
ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
if (ret >= 0) {
- if ((ret & 0xfff0) == 0x1710)
+ if ((ret & 0xfff0) == ID_6171)
return "Marvell 88E6171";
- if ((ret & 0xfff0) == 0x1720)
+ if ((ret & 0xfff0) == ID_6172)
return "Marvell 88E6172";
}
val |= 0x000c;
REG_WRITE(addr, 0x04, val);
- /* Port Control 1: disable trunking. Also, if this is the
- * CPU port, enable learn messages to be sent to this port.
- */
- REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000);
-
- /* Port based VLAN map: give each port its own address
- * database, allow the CPU port to talk to each of the 'real'
- * ports, and allow each of the 'real' ports to only talk to
- * the upstream port.
- */
- val = (p & 0xf) << 12;
- if (dsa_is_cpu_port(ds, p))
- val |= ds->phys_port_mask;
- else
- val |= 1 << dsa_upstream_port(ds);
- REG_WRITE(addr, 0x06, val);
-
- /* Default VLAN ID and priority: don't set a default VLAN
- * ID, and set the default packet priority to zero.
- */
- REG_WRITE(addr, 0x07, 0x0000);
-
/* Port Control 2: don't force a good FCS, set the maximum
* frame size to 10240 bytes, don't let the switch add or
* strip 802.1q tags, don't discard tagged or untagged frames
*/
REG_WRITE(addr, 0x19, 0x7654);
- return 0;
+ return mv88e6xxx_setup_port_common(ds, p);
}
static int mv88e6171_setup(struct dsa_switch *ds)
{
- struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
int i;
int ret;
- mutex_init(&ps->smi_mutex);
- mutex_init(&ps->stats_mutex);
+ ret = mv88e6xxx_setup_common(ds);
+ if (ret < 0)
+ return ret;
ret = mv88e6171_switch_reset(ds);
if (ret < 0)
return ret;
}
- mutex_init(&ps->phy_mutex);
-
return 0;
}
return ARRAY_SIZE(mv88e6171_hw_stats);
}
+static int mv88e6171_get_eee(struct dsa_switch *ds, int port,
+ struct ethtool_eee *e)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+
+ if (ps->id == ID_6172)
+ return mv88e6xxx_get_eee(ds, port, e);
+
+ return -EOPNOTSUPP;
+}
+
+static int mv88e6171_set_eee(struct dsa_switch *ds, int port,
+ struct phy_device *phydev, struct ethtool_eee *e)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+
+ if (ps->id == ID_6172)
+ return mv88e6xxx_set_eee(ds, port, phydev, e);
+
+ return -EOPNOTSUPP;
+}
+
struct dsa_switch_driver mv88e6171_switch_driver = {
.tag_protocol = DSA_TAG_PROTO_EDSA,
.priv_size = sizeof(struct mv88e6xxx_priv_state),
.get_strings = mv88e6171_get_strings,
.get_ethtool_stats = mv88e6171_get_ethtool_stats,
.get_sset_count = mv88e6171_get_sset_count,
+ .set_eee = mv88e6171_set_eee,
+ .get_eee = mv88e6171_get_eee,
#ifdef CONFIG_NET_DSA_HWMON
.get_temp = mv88e6xxx_get_temp,
#endif
.get_regs_len = mv88e6xxx_get_regs_len,
.get_regs = mv88e6xxx_get_regs,
+ .port_join_bridge = mv88e6xxx_join_bridge,
+ .port_leave_bridge = mv88e6xxx_leave_bridge,
+ .port_stp_update = mv88e6xxx_port_stp_update,
+ .fdb_add = mv88e6xxx_port_fdb_add,
+ .fdb_del = mv88e6xxx_port_fdb_del,
+ .fdb_getnext = mv88e6xxx_port_fdb_getnext,
};
MODULE_ALIAS("platform:mv88e6171");
val |= 0x000c;
REG_WRITE(addr, 0x04, val);
- /* Port Control 1: disable trunking. Also, if this is the
- * CPU port, enable learn messages to be sent to this port.
- */
- REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000);
-
- /* Port based VLAN map: give each port its own address
- * database, allow the CPU port to talk to each of the 'real'
- * ports, and allow each of the 'real' ports to only talk to
- * the upstream port.
- */
- val = (p & 0xf) << 12;
- if (dsa_is_cpu_port(ds, p))
- val |= ds->phys_port_mask;
- else
- val |= 1 << dsa_upstream_port(ds);
- REG_WRITE(addr, 0x06, val);
-
- /* Default VLAN ID and priority: don't set a default VLAN
- * ID, and set the default packet priority to zero.
- */
- REG_WRITE(addr, 0x07, 0x0000);
-
/* Port Control 2: don't force a good FCS, set the maximum
* frame size to 10240 bytes, don't let the switch add or
* strip 802.1q tags, don't discard tagged or untagged frames
*/
REG_WRITE(addr, 0x19, 0x7654);
- return 0;
+ return mv88e6xxx_setup_port_common(ds, p);
}
#ifdef CONFIG_NET_DSA_HWMON
int ret;
int i;
- mutex_init(&ps->smi_mutex);
- mutex_init(&ps->stats_mutex);
- mutex_init(&ps->phy_mutex);
- mutex_init(&ps->eeprom_mutex);
+ ret = mv88e6xxx_setup_common(ds);
+ if (ret < 0)
+ return ret;
- ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0;
+ mutex_init(&ps->eeprom_mutex);
ret = mv88e6352_switch_reset(ds);
if (ret < 0)
.set_eeprom = mv88e6352_set_eeprom,
.get_regs_len = mv88e6xxx_get_regs_len,
.get_regs = mv88e6xxx_get_regs,
+ .port_join_bridge = mv88e6xxx_join_bridge,
+ .port_leave_bridge = mv88e6xxx_leave_bridge,
+ .port_stp_update = mv88e6xxx_port_stp_update,
+ .fdb_add = mv88e6xxx_port_fdb_add,
+ .fdb_del = mv88e6xxx_port_fdb_del,
+ .fdb_getnext = mv88e6xxx_port_fdb_getnext,
};
MODULE_ALIAS("platform:mv88e6352");
*/
#include <linux/delay.h>
+#include <linux/etherdevice.h>
+#include <linux/if_bridge.h>
#include <linux/jiffies.h>
#include <linux/list.h>
#include <linux/module.h>
return ret & 0xffff;
}
-int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg)
+/* Must be called with SMI mutex held */
+static int _mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg)
{
- struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
struct mii_bus *bus = dsa_host_dev_to_mii_bus(ds->master_dev);
int ret;
if (bus == NULL)
return -EINVAL;
- mutex_lock(&ps->smi_mutex);
ret = __mv88e6xxx_reg_read(bus, ds->pd->sw_addr, addr, reg);
- mutex_unlock(&ps->smi_mutex);
-
if (ret < 0)
return ret;
return ret;
}
+int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int ret;
+
+ mutex_lock(&ps->smi_mutex);
+ ret = _mv88e6xxx_reg_read(ds, addr, reg);
+ mutex_unlock(&ps->smi_mutex);
+
+ return ret;
+}
+
int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
int reg, u16 val)
{
return 0;
}
-int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
+/* Must be called with SMI mutex held */
+static int _mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg,
+ u16 val)
{
- struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
struct mii_bus *bus = dsa_host_dev_to_mii_bus(ds->master_dev);
- int ret;
if (bus == NULL)
return -EINVAL;
dev_dbg(ds->master_dev, "-> addr: 0x%.2x reg: 0x%.2x val: 0x%.4x\n",
addr, reg, val);
+ return __mv88e6xxx_reg_write(bus, ds->pd->sw_addr, addr, reg, val);
+}
+
+int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int ret;
+
mutex_lock(&ps->smi_mutex);
- ret = __mv88e6xxx_reg_write(bus, ds->pd->sw_addr, addr, reg, val);
+ ret = _mv88e6xxx_reg_write(ds, addr, reg, val);
mutex_unlock(&ps->smi_mutex);
return ret;
return mv88e6xxx_wait(ds, REG_GLOBAL2, 0x14, 0x8000);
}
+/* Must be called with SMI lock held */
+static int _mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, u16 mask)
+{
+ unsigned long timeout = jiffies + HZ / 10;
+
+ while (time_before(jiffies, timeout)) {
+ int ret;
+
+ ret = _mv88e6xxx_reg_read(ds, reg, offset);
+ if (ret < 0)
+ return ret;
+ if (!(ret & mask))
+ return 0;
+
+ usleep_range(1000, 2000);
+ }
+ return -ETIMEDOUT;
+}
+
+/* Must be called with SMI lock held */
+static int _mv88e6xxx_atu_wait(struct dsa_switch *ds)
+{
+ return _mv88e6xxx_wait(ds, REG_GLOBAL, 0x0b, ATU_BUSY);
+}
+
int mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int addr, int regnum)
{
int ret;
return 0;
}
+static int _mv88e6xxx_atu_cmd(struct dsa_switch *ds, int fid, u16 cmd)
+{
+ int ret;
+
+ ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x01, fid);
+ if (ret < 0)
+ return ret;
+
+ ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x0b, cmd);
+ if (ret < 0)
+ return ret;
+
+ return _mv88e6xxx_atu_wait(ds);
+}
+
+static int _mv88e6xxx_flush_fid(struct dsa_switch *ds, int fid)
+{
+ int ret;
+
+ ret = _mv88e6xxx_atu_wait(ds);
+ if (ret < 0)
+ return ret;
+
+ return _mv88e6xxx_atu_cmd(ds, fid, ATU_CMD_FLUSH_NONSTATIC_FID);
+}
+
+static int mv88e6xxx_set_port_state(struct dsa_switch *ds, int port, u8 state)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int reg, ret;
+ u8 oldstate;
+
+ mutex_lock(&ps->smi_mutex);
+
+ reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), 0x04);
+ if (reg < 0)
+ goto abort;
+
+ oldstate = reg & PSTATE_MASK;
+ if (oldstate != state) {
+ /* Flush forwarding database if we're moving a port
+ * from Learning or Forwarding state to Disabled or
+ * Blocking or Listening state.
+ */
+ if (oldstate >= PSTATE_LEARNING && state <= PSTATE_BLOCKING) {
+ ret = _mv88e6xxx_flush_fid(ds, ps->fid[port]);
+ if (ret)
+ goto abort;
+ }
+ reg = (reg & ~PSTATE_MASK) | state;
+ ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x04, reg);
+ }
+
+abort:
+ mutex_unlock(&ps->smi_mutex);
+ return ret;
+}
+
+/* Must be called with smi lock held */
+static int _mv88e6xxx_update_port_config(struct dsa_switch *ds, int port)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ u8 fid = ps->fid[port];
+ u16 reg = fid << 12;
+
+ if (dsa_is_cpu_port(ds, port))
+ reg |= ds->phys_port_mask;
+ else
+ reg |= (ps->bridge_mask[fid] |
+ (1 << dsa_upstream_port(ds))) & ~(1 << port);
+
+ return _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x06, reg);
+}
+
+/* Must be called with smi lock held */
+static int _mv88e6xxx_update_bridge_config(struct dsa_switch *ds, int fid)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int port;
+ u32 mask;
+ int ret;
+
+ mask = ds->phys_port_mask;
+ while (mask) {
+ port = __ffs(mask);
+ mask &= ~(1 << port);
+ if (ps->fid[port] != fid)
+ continue;
+
+ ret = _mv88e6xxx_update_port_config(ds, port);
+ if (ret)
+ return ret;
+ }
+
+ return _mv88e6xxx_flush_fid(ds, fid);
+}
+
+/* Bridge handling functions */
+
+int mv88e6xxx_join_bridge(struct dsa_switch *ds, int port, u32 br_port_mask)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int ret = 0;
+ u32 nmask;
+ int fid;
+
+ /* If the bridge group is not empty, join that group.
+ * Otherwise create a new group.
+ */
+ fid = ps->fid[port];
+ nmask = br_port_mask & ~(1 << port);
+ if (nmask)
+ fid = ps->fid[__ffs(nmask)];
+
+ nmask = ps->bridge_mask[fid] | (1 << port);
+ if (nmask != br_port_mask) {
+ netdev_err(ds->ports[port],
+ "join: Bridge port mask mismatch fid=%d mask=0x%x expected 0x%x\n",
+ fid, br_port_mask, nmask);
+ return -EINVAL;
+ }
+
+ mutex_lock(&ps->smi_mutex);
+
+ ps->bridge_mask[fid] = br_port_mask;
+
+ if (fid != ps->fid[port]) {
+ ps->fid_mask |= 1 << ps->fid[port];
+ ps->fid[port] = fid;
+ ret = _mv88e6xxx_update_bridge_config(ds, fid);
+ }
+
+ mutex_unlock(&ps->smi_mutex);
+
+ return ret;
+}
+
+int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int port, u32 br_port_mask)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ u8 fid, newfid;
+ int ret;
+
+ fid = ps->fid[port];
+
+ if (ps->bridge_mask[fid] != br_port_mask) {
+ netdev_err(ds->ports[port],
+ "leave: Bridge port mask mismatch fid=%d mask=0x%x expected 0x%x\n",
+ fid, br_port_mask, ps->bridge_mask[fid]);
+ return -EINVAL;
+ }
+
+ /* If the port was the last port of a bridge, we are done.
+ * Otherwise assign a new fid to the port, and fix up
+ * the bridge configuration.
+ */
+ if (br_port_mask == (1 << port))
+ return 0;
+
+ mutex_lock(&ps->smi_mutex);
+
+ newfid = __ffs(ps->fid_mask);
+ ps->fid[port] = newfid;
+ ps->fid_mask &= (1 << newfid);
+ ps->bridge_mask[fid] &= ~(1 << port);
+ ps->bridge_mask[newfid] = 1 << port;
+
+ ret = _mv88e6xxx_update_bridge_config(ds, fid);
+ if (!ret)
+ ret = _mv88e6xxx_update_bridge_config(ds, newfid);
+
+ mutex_unlock(&ps->smi_mutex);
+
+ return ret;
+}
+
+int mv88e6xxx_port_stp_update(struct dsa_switch *ds, int port, u8 state)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int stp_state;
+
+ switch (state) {
+ case BR_STATE_DISABLED:
+ stp_state = PSTATE_DISABLED;
+ break;
+ case BR_STATE_BLOCKING:
+ case BR_STATE_LISTENING:
+ stp_state = PSTATE_BLOCKING;
+ break;
+ case BR_STATE_LEARNING:
+ stp_state = PSTATE_LEARNING;
+ break;
+ case BR_STATE_FORWARDING:
+ default:
+ stp_state = PSTATE_FORWARDING;
+ break;
+ }
+
+ netdev_dbg(ds->ports[port], "port state %d [%d]\n", state, stp_state);
+
+ /* mv88e6xxx_port_stp_update may be called with softirqs disabled,
+ * so we can not update the port state directly but need to schedule it.
+ */
+ ps->port_state[port] = stp_state;
+ set_bit(port, &ps->port_state_update_mask);
+ schedule_work(&ps->bridge_work);
+
+ return 0;
+}
+
+static int __mv88e6xxx_write_addr(struct dsa_switch *ds,
+ const unsigned char *addr)
+{
+ int i, ret;
+
+ for (i = 0; i < 3; i++) {
+ ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x0d + i,
+ (addr[i * 2] << 8) | addr[i * 2 + 1]);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int __mv88e6xxx_read_addr(struct dsa_switch *ds, unsigned char *addr)
+{
+ int i, ret;
+
+ for (i = 0; i < 3; i++) {
+ ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x0d + i);
+ if (ret < 0)
+ return ret;
+ addr[i * 2] = ret >> 8;
+ addr[i * 2 + 1] = ret & 0xff;
+ }
+
+ return 0;
+}
+
+static int __mv88e6xxx_port_fdb_cmd(struct dsa_switch *ds, int port,
+ const unsigned char *addr, int state)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ u8 fid = ps->fid[port];
+ int ret;
+
+ ret = _mv88e6xxx_atu_wait(ds);
+ if (ret < 0)
+ return ret;
+
+ ret = __mv88e6xxx_write_addr(ds, addr);
+ if (ret < 0)
+ return ret;
+
+ ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x0c,
+ (0x10 << port) | state);
+ if (ret)
+ return ret;
+
+ ret = _mv88e6xxx_atu_cmd(ds, fid, ATU_CMD_LOAD_FID);
+
+ return ret;
+}
+
+int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid)
+{
+ int state = is_multicast_ether_addr(addr) ?
+ FDB_STATE_MC_STATIC : FDB_STATE_STATIC;
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int ret;
+
+ mutex_lock(&ps->smi_mutex);
+ ret = __mv88e6xxx_port_fdb_cmd(ds, port, addr, state);
+ mutex_unlock(&ps->smi_mutex);
+
+ return ret;
+}
+
+int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int ret;
+
+ mutex_lock(&ps->smi_mutex);
+ ret = __mv88e6xxx_port_fdb_cmd(ds, port, addr, FDB_STATE_UNUSED);
+ mutex_unlock(&ps->smi_mutex);
+
+ return ret;
+}
+
+static int __mv88e6xxx_port_getnext(struct dsa_switch *ds, int port,
+ unsigned char *addr, bool *is_static)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ u8 fid = ps->fid[port];
+ int ret, state;
+
+ ret = _mv88e6xxx_atu_wait(ds);
+ if (ret < 0)
+ return ret;
+
+ ret = __mv88e6xxx_write_addr(ds, addr);
+ if (ret < 0)
+ return ret;
+
+ do {
+ ret = _mv88e6xxx_atu_cmd(ds, fid, ATU_CMD_GETNEXT_FID);
+ if (ret < 0)
+ return ret;
+
+ ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x0c);
+ if (ret < 0)
+ return ret;
+ state = ret & FDB_STATE_MASK;
+ if (state == FDB_STATE_UNUSED)
+ return -ENOENT;
+ } while (!(((ret >> 4) & 0xff) & (1 << port)));
+
+ ret = __mv88e6xxx_read_addr(ds, addr);
+ if (ret < 0)
+ return ret;
+
+ *is_static = state == (is_multicast_ether_addr(addr) ?
+ FDB_STATE_MC_STATIC : FDB_STATE_STATIC);
+
+ return 0;
+}
+
+/* get next entry for port */
+int mv88e6xxx_port_fdb_getnext(struct dsa_switch *ds, int port,
+ unsigned char *addr, bool *is_static)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int ret;
+
+ mutex_lock(&ps->smi_mutex);
+ ret = __mv88e6xxx_port_getnext(ds, port, addr, is_static);
+ mutex_unlock(&ps->smi_mutex);
+
+ return ret;
+}
+
+static void mv88e6xxx_bridge_work(struct work_struct *work)
+{
+ struct mv88e6xxx_priv_state *ps;
+ struct dsa_switch *ds;
+ int port;
+
+ ps = container_of(work, struct mv88e6xxx_priv_state, bridge_work);
+ ds = ((struct dsa_switch *)ps) - 1;
+
+ while (ps->port_state_update_mask) {
+ port = __ffs(ps->port_state_update_mask);
+ clear_bit(port, &ps->port_state_update_mask);
+ mv88e6xxx_set_port_state(ds, port, ps->port_state[port]);
+ }
+}
+
+int mv88e6xxx_setup_port_common(struct dsa_switch *ds, int port)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int ret, fid;
+
+ mutex_lock(&ps->smi_mutex);
+
+ /* Port Control 1: disable trunking, disable sending
+ * learning messages to this port.
+ */
+ ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x05, 0x0000);
+ if (ret)
+ goto abort;
+
+ /* Port based VLAN map: give each port its own address
+ * database, allow the CPU port to talk to each of the 'real'
+ * ports, and allow each of the 'real' ports to only talk to
+ * the upstream port.
+ */
+ fid = __ffs(ps->fid_mask);
+ ps->fid[port] = fid;
+ ps->fid_mask &= ~(1 << fid);
+
+ if (!dsa_is_cpu_port(ds, port))
+ ps->bridge_mask[fid] = 1 << port;
+
+ ret = _mv88e6xxx_update_port_config(ds, port);
+ if (ret)
+ goto abort;
+
+ /* Default VLAN ID and priority: don't set a default VLAN
+ * ID, and set the default packet priority to zero.
+ */
+ ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), 0x07, 0x0000);
+abort:
+ mutex_unlock(&ps->smi_mutex);
+ return ret;
+}
+
+int mv88e6xxx_setup_common(struct dsa_switch *ds)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+
+ mutex_init(&ps->smi_mutex);
+ mutex_init(&ps->stats_mutex);
+ mutex_init(&ps->phy_mutex);
+
+ ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0;
+
+ ps->fid_mask = (1 << DSA_MAX_PORTS) - 1;
+
+ INIT_WORK(&ps->bridge_work, mv88e6xxx_bridge_work);
+
+ return 0;
+}
+
static int __init mv88e6xxx_init(void)
{
#if IS_ENABLED(CONFIG_NET_DSA_MV88E6131)
#define REG_GLOBAL 0x1b
#define REG_GLOBAL2 0x1c
+/* ATU commands */
+
+#define ATU_BUSY 0x8000
+
+#define ATU_CMD_LOAD_FID (ATU_BUSY | 0x3000)
+#define ATU_CMD_GETNEXT_FID (ATU_BUSY | 0x4000)
+#define ATU_CMD_FLUSH_NONSTATIC_FID (ATU_BUSY | 0x6000)
+
+/* port states */
+
+#define PSTATE_MASK 0x03
+#define PSTATE_DISABLED 0x00
+#define PSTATE_BLOCKING 0x01
+#define PSTATE_LEARNING 0x02
+#define PSTATE_FORWARDING 0x03
+
+/* FDB states */
+
+#define FDB_STATE_MASK 0x0f
+
+#define FDB_STATE_UNUSED 0x00
+#define FDB_STATE_MC_STATIC 0x07 /* static multicast */
+#define FDB_STATE_STATIC 0x0e /* static unicast */
+
struct mv88e6xxx_priv_state {
/* When using multi-chip addressing, this mutex protects
* access to the indirect access registers. (In single-chip
struct mutex eeprom_mutex;
int id; /* switch product id */
+
+ /* hw bridging */
+
+ u32 fid_mask;
+ u8 fid[DSA_MAX_PORTS];
+ u16 bridge_mask[DSA_MAX_PORTS];
+
+ unsigned long port_state_update_mask;
+ u8 port_state[DSA_MAX_PORTS];
+
+ struct work_struct bridge_work;
};
struct mv88e6xxx_hw_stat {
int reg;
};
+int mv88e6xxx_setup_port_common(struct dsa_switch *ds, int port);
+int mv88e6xxx_setup_common(struct dsa_switch *ds);
int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg);
int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg);
int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e);
int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
struct phy_device *phydev, struct ethtool_eee *e);
+int mv88e6xxx_join_bridge(struct dsa_switch *ds, int port, u32 br_port_mask);
+int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int port, u32 br_port_mask);
+int mv88e6xxx_port_stp_update(struct dsa_switch *ds, int port, u8 state);
+int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid);
+int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid);
+int mv88e6xxx_port_fdb_getnext(struct dsa_switch *ds, int port,
+ unsigned char *addr, bool *is_static);
extern struct dsa_switch_driver mv88e6131_switch_driver;
extern struct dsa_switch_driver mv88e6123_61_65_switch_driver;
{
struct xgene_enet_pdata *pdata = netdev_priv(ndev);
struct device *dev = ndev_to_dev(ndev);
+ struct xgene_enet_desc_ring *ring;
int ret;
- ret = devm_request_irq(dev, pdata->rx_ring->irq, xgene_enet_rx_irq,
- IRQF_SHARED, ndev->name, pdata->rx_ring);
- if (ret) {
- netdev_err(ndev, "rx%d interrupt request failed\n",
- pdata->rx_ring->irq);
+ ring = pdata->rx_ring;
+ ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq,
+ IRQF_SHARED, ring->irq_name, ring);
+ if (ret)
+ netdev_err(ndev, "Failed to request irq %s\n", ring->irq_name);
+
+ if (pdata->cq_cnt) {
+ ring = pdata->tx_ring->cp_ring;
+ ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq,
+ IRQF_SHARED, ring->irq_name, ring);
+ if (ret) {
+ netdev_err(ndev, "Failed to request irq %s\n",
+ ring->irq_name);
+ }
}
return ret;
pdata = netdev_priv(ndev);
dev = ndev_to_dev(ndev);
devm_free_irq(dev, pdata->rx_ring->irq, pdata->rx_ring);
+
+ if (pdata->cq_cnt) {
+ devm_free_irq(dev, pdata->tx_ring->cp_ring->irq,
+ pdata->tx_ring->cp_ring);
+ }
+}
+
+static void xgene_enet_napi_enable(struct xgene_enet_pdata *pdata)
+{
+ struct napi_struct *napi;
+
+ napi = &pdata->rx_ring->napi;
+ napi_enable(napi);
+
+ if (pdata->cq_cnt) {
+ napi = &pdata->tx_ring->cp_ring->napi;
+ napi_enable(napi);
+ }
+}
+
+static void xgene_enet_napi_disable(struct xgene_enet_pdata *pdata)
+{
+ struct napi_struct *napi;
+
+ napi = &pdata->rx_ring->napi;
+ napi_disable(napi);
+
+ if (pdata->cq_cnt) {
+ napi = &pdata->tx_ring->cp_ring->napi;
+ napi_disable(napi);
+ }
}
static int xgene_enet_open(struct net_device *ndev)
ret = xgene_enet_register_irq(ndev);
if (ret)
return ret;
- napi_enable(&pdata->rx_ring->napi);
+ xgene_enet_napi_enable(pdata);
if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
phy_start(pdata->phy_dev);
else
cancel_delayed_work_sync(&pdata->link_work);
- napi_disable(&pdata->rx_ring->napi);
+ xgene_enet_napi_disable(pdata);
xgene_enet_free_irq(ndev);
xgene_enet_process_ring(pdata->rx_ring, -1);
if (ring) {
if (ring->cp_ring && ring->cp_ring->cp_skb)
devm_kfree(dev, ring->cp_ring->cp_skb);
+ if (ring->cp_ring && pdata->cq_cnt)
+ xgene_enet_free_desc_ring(ring->cp_ring);
xgene_enet_free_desc_ring(ring);
}
rx_ring->nbufpool = NUM_BUFPOOL;
rx_ring->buf_pool = buf_pool;
rx_ring->irq = pdata->rx_irq;
+ if (!pdata->cq_cnt) {
+ snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx-txc",
+ ndev->name);
+ } else {
+ snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx", ndev->name);
+ }
buf_pool->rx_skb = devm_kcalloc(dev, buf_pool->slots,
sizeof(struct sk_buff *), GFP_KERNEL);
if (!buf_pool->rx_skb) {
}
pdata->tx_ring = tx_ring;
- cp_ring = pdata->rx_ring;
+ if (!pdata->cq_cnt) {
+ cp_ring = pdata->rx_ring;
+ } else {
+ /* allocate tx completion descriptor ring */
+ ring_id = xgene_enet_get_ring_id(RING_OWNER_CPU, cpu_bufnum++);
+ cp_ring = xgene_enet_create_desc_ring(ndev, ring_num++,
+ RING_CFGSIZE_16KB,
+ ring_id);
+ if (!cp_ring) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cp_ring->irq = pdata->txc_irq;
+ snprintf(cp_ring->irq_name, IRQ_ID_SIZE, "%s-txc", ndev->name);
+ }
+
cp_ring->cp_skb = devm_kcalloc(dev, tx_ring->slots,
sizeof(struct sk_buff *), GFP_KERNEL);
if (!cp_ring->cp_skb) {
return -ENOMEM;
}
- ret = platform_get_irq(pdev, 0);
- if (ret <= 0) {
- dev_err(dev, "Unable to get ENET Rx IRQ\n");
- ret = ret ? : -ENXIO;
- return ret;
- }
- pdata->rx_irq = ret;
-
ret = xgene_get_port_id(dev, pdata);
if (ret)
return ret;
return -ENODEV;
}
+ ret = platform_get_irq(pdev, 0);
+ if (ret <= 0) {
+ dev_err(dev, "Unable to get ENET Rx IRQ\n");
+ ret = ret ? : -ENXIO;
+ return ret;
+ }
+ pdata->rx_irq = ret;
+
+ if (pdata->phy_mode != PHY_INTERFACE_MODE_RGMII) {
+ ret = platform_get_irq(pdev, 1);
+ if (ret <= 0) {
+ dev_err(dev, "Unable to get ENET Tx completion IRQ\n");
+ ret = ret ? : -ENXIO;
+ return ret;
+ }
+ pdata->txc_irq = ret;
+ }
+
pdata->clk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(pdata->clk)) {
/* Firmware may have set up the clock already. */
pdata->mac_ops = &xgene_sgmac_ops;
pdata->port_ops = &xgene_sgport_ops;
pdata->rm = RM1;
+ pdata->cq_cnt = XGENE_MAX_TXC_RINGS;
break;
default:
pdata->mac_ops = &xgene_xgmac_ops;
pdata->port_ops = &xgene_xgport_ops;
pdata->rm = RM0;
+ pdata->cq_cnt = XGENE_MAX_TXC_RINGS;
break;
}
}
+static void xgene_enet_napi_add(struct xgene_enet_pdata *pdata)
+{
+ struct napi_struct *napi;
+
+ napi = &pdata->rx_ring->napi;
+ netif_napi_add(pdata->ndev, napi, xgene_enet_napi, NAPI_POLL_WEIGHT);
+
+ if (pdata->cq_cnt) {
+ napi = &pdata->tx_ring->cp_ring->napi;
+ netif_napi_add(pdata->ndev, napi, xgene_enet_napi,
+ NAPI_POLL_WEIGHT);
+ }
+}
+
+static void xgene_enet_napi_del(struct xgene_enet_pdata *pdata)
+{
+ struct napi_struct *napi;
+
+ napi = &pdata->rx_ring->napi;
+ netif_napi_del(napi);
+
+ if (pdata->cq_cnt) {
+ napi = &pdata->tx_ring->cp_ring->napi;
+ netif_napi_del(napi);
+ }
+}
+
static int xgene_enet_probe(struct platform_device *pdev)
{
struct net_device *ndev;
struct xgene_enet_pdata *pdata;
struct device *dev = &pdev->dev;
- struct napi_struct *napi;
struct xgene_mac_ops *mac_ops;
int ret;
if (ret)
goto err;
- napi = &pdata->rx_ring->napi;
- netif_napi_add(ndev, napi, xgene_enet_napi, NAPI_POLL_WEIGHT);
+ xgene_enet_napi_add(pdata);
mac_ops = pdata->mac_ops;
if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
ret = xgene_enet_mdio_config(pdata);
mac_ops->rx_disable(pdata);
mac_ops->tx_disable(pdata);
- netif_napi_del(&pdata->rx_ring->napi);
+ xgene_enet_napi_del(pdata);
xgene_enet_mdio_remove(pdata);
xgene_enet_delete_desc_rings(pdata);
unregister_netdev(ndev);
#define START_BP_BUFNUM_1 0x2A
#define START_RING_NUM_1 264
+#define IRQ_ID_SIZE 16
+#define XGENE_MAX_TXC_RINGS 1
+
#define PHY_POLL_LINK_ON (10 * HZ)
#define PHY_POLL_LINK_OFF (PHY_POLL_LINK_ON / 5)
u16 tail;
u16 slots;
u16 irq;
+ char irq_name[IRQ_ID_SIZE];
u32 size;
u32 state[NUM_RING_CONFIG];
void __iomem *cmd_base;
u32 cp_qcnt_hi;
u32 cp_qcnt_low;
u32 rx_irq;
+ u32 txc_irq;
+ u8 cq_cnt;
void __iomem *eth_csr_addr;
void __iomem *eth_ring_if_addr;
void __iomem *eth_diag_csr_addr;
struct net_device *dev,
netdev_features_t features)
{
+ features = vlan_features_check(skb, features);
return vxlan_features_check(skb, features);
}
dma_unmap_addr_set(cb, dma_addr, 0);
}
-static inline void bcmgenet_tx_ring16_int_disable(struct bcmgenet_priv *priv,
- struct bcmgenet_tx_ring *ring)
+static inline void bcmgenet_rx_ring16_int_disable(struct bcmgenet_rx_ring *ring)
{
- bcmgenet_intrl2_0_writel(priv,
+ bcmgenet_intrl2_0_writel(ring->priv,
+ UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE,
+ INTRL2_CPU_MASK_SET);
+}
+
+static inline void bcmgenet_rx_ring16_int_enable(struct bcmgenet_rx_ring *ring)
+{
+ bcmgenet_intrl2_0_writel(ring->priv,
+ UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE,
+ INTRL2_CPU_MASK_CLEAR);
+}
+
+static inline void bcmgenet_rx_ring_int_disable(struct bcmgenet_rx_ring *ring)
+{
+ bcmgenet_intrl2_1_writel(ring->priv,
+ 1 << (UMAC_IRQ1_RX_INTR_SHIFT + ring->index),
+ INTRL2_CPU_MASK_SET);
+}
+
+static inline void bcmgenet_rx_ring_int_enable(struct bcmgenet_rx_ring *ring)
+{
+ bcmgenet_intrl2_1_writel(ring->priv,
+ 1 << (UMAC_IRQ1_RX_INTR_SHIFT + ring->index),
+ INTRL2_CPU_MASK_CLEAR);
+}
+
+static inline void bcmgenet_tx_ring16_int_disable(struct bcmgenet_tx_ring *ring)
+{
+ bcmgenet_intrl2_0_writel(ring->priv,
UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE,
INTRL2_CPU_MASK_SET);
}
-static inline void bcmgenet_tx_ring16_int_enable(struct bcmgenet_priv *priv,
- struct bcmgenet_tx_ring *ring)
+static inline void bcmgenet_tx_ring16_int_enable(struct bcmgenet_tx_ring *ring)
{
- bcmgenet_intrl2_0_writel(priv,
+ bcmgenet_intrl2_0_writel(ring->priv,
UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE,
INTRL2_CPU_MASK_CLEAR);
}
-static inline void bcmgenet_tx_ring_int_enable(struct bcmgenet_priv *priv,
- struct bcmgenet_tx_ring *ring)
+static inline void bcmgenet_tx_ring_int_enable(struct bcmgenet_tx_ring *ring)
{
- bcmgenet_intrl2_1_writel(priv, (1 << ring->index),
+ bcmgenet_intrl2_1_writel(ring->priv, 1 << ring->index,
INTRL2_CPU_MASK_CLEAR);
- priv->int1_mask &= ~(1 << ring->index);
}
-static inline void bcmgenet_tx_ring_int_disable(struct bcmgenet_priv *priv,
- struct bcmgenet_tx_ring *ring)
+static inline void bcmgenet_tx_ring_int_disable(struct bcmgenet_tx_ring *ring)
{
- bcmgenet_intrl2_1_writel(priv, (1 << ring->index),
+ bcmgenet_intrl2_1_writel(ring->priv, 1 << ring->index,
INTRL2_CPU_MASK_SET);
- priv->int1_mask |= (1 << ring->index);
}
/* Unlocked version of the reclaim routine */
if (work_done == 0) {
napi_complete(napi);
- ring->int_enable(ring->priv, ring);
+ ring->int_enable(ring);
return 0;
}
/* bcmgenet_desc_rx - descriptor based rx process.
* this could be called from bottom half, or from NAPI polling method.
*/
-static unsigned int bcmgenet_desc_rx(struct bcmgenet_priv *priv,
- unsigned int index,
+static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
unsigned int budget)
{
- struct bcmgenet_rx_ring *ring = &priv->rx_rings[index];
+ struct bcmgenet_priv *priv = ring->priv;
struct net_device *dev = priv->dev;
struct enet_cb *cb;
struct sk_buff *skb;
unsigned int discards;
unsigned int chksum_ok = 0;
- p_index = bcmgenet_rdma_ring_readl(priv, index, RDMA_PROD_INDEX);
+ p_index = bcmgenet_rdma_ring_readl(priv, ring->index, RDMA_PROD_INDEX);
discards = (p_index >> DMA_P_INDEX_DISCARD_CNT_SHIFT) &
DMA_P_INDEX_DISCARD_CNT_MASK;
/* Clear HW register when we reach 75% of maximum 0xFFFF */
if (ring->old_discards >= 0xC000) {
ring->old_discards = 0;
- bcmgenet_rdma_ring_writel(priv, index, 0,
+ bcmgenet_rdma_ring_writel(priv, ring->index, 0,
RDMA_PROD_INDEX);
}
}
dev->stats.multicast++;
/* Notify kernel */
- napi_gro_receive(&priv->napi, skb);
+ napi_gro_receive(&ring->napi, skb);
netif_dbg(priv, rx_status, dev, "pushed up to kernel\n");
next:
ring->read_ptr = ring->cb_ptr;
ring->c_index = (ring->c_index + 1) & DMA_C_INDEX_MASK;
- bcmgenet_rdma_ring_writel(priv, index, ring->c_index, RDMA_CONS_INDEX);
+ bcmgenet_rdma_ring_writel(priv, ring->index, ring->c_index, RDMA_CONS_INDEX);
}
return rxpktprocessed;
}
+/* Rx NAPI polling method */
+static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
+{
+ struct bcmgenet_rx_ring *ring = container_of(napi,
+ struct bcmgenet_rx_ring, napi);
+ unsigned int work_done;
+
+ work_done = bcmgenet_desc_rx(ring, budget);
+
+ if (work_done < budget) {
+ napi_complete(napi);
+ ring->int_enable(ring);
+ }
+
+ return work_done;
+}
+
/* Assign skb to RX DMA descriptor. */
static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv,
struct bcmgenet_rx_ring *ring)
{
struct device *kdev = &priv->pdev->dev;
int ret;
- u32 reg, cpu_mask_clear;
- int index;
+ u32 reg;
+ u32 int0_enable = 0;
+ u32 int1_enable = 0;
+ int i;
dev_dbg(&priv->pdev->dev, "bcmgenet: init_umac\n");
bcmgenet_intr_disable(priv);
- cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_TXDMA_BDONE;
+ /* Enable Rx default queue 16 interrupts */
+ int0_enable |= (UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE);
- dev_dbg(kdev, "%s:Enabling RXDMA_BDONE interrupt\n", __func__);
+ /* Enable Tx default queue 16 interrupts */
+ int0_enable |= (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE);
/* Monitor cable plug/unplugged event for internal PHY */
if (phy_is_internal(priv->phydev)) {
- cpu_mask_clear |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP);
+ int0_enable |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP);
} else if (priv->ext_phy) {
- cpu_mask_clear |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP);
+ int0_enable |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP);
} else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
reg = bcmgenet_bp_mc_get(priv);
reg |= BIT(priv->hw_params->bp_in_en_shift);
/* Enable MDIO interrupts on GENET v3+ */
if (priv->hw_params->flags & GENET_HAS_MDIO_INTR)
- cpu_mask_clear |= UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR;
+ int0_enable |= (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR);
+
+ /* Enable Rx priority queue interrupts */
+ for (i = 0; i < priv->hw_params->rx_queues; ++i)
+ int1_enable |= (1 << (UMAC_IRQ1_RX_INTR_SHIFT + i));
- bcmgenet_intrl2_0_writel(priv, cpu_mask_clear, INTRL2_CPU_MASK_CLEAR);
+ /* Enable Tx priority queue interrupts */
+ for (i = 0; i < priv->hw_params->tx_queues; ++i)
+ int1_enable |= (1 << i);
- for (index = 0; index < priv->hw_params->tx_queues; index++)
- bcmgenet_intrl2_1_writel(priv, (1 << index),
- INTRL2_CPU_MASK_CLEAR);
+ bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR);
+ bcmgenet_intrl2_1_writel(priv, int1_enable, INTRL2_CPU_MASK_CLEAR);
/* Enable rx/tx engine.*/
dev_dbg(kdev, "done init umac\n");
spin_lock_init(&ring->lock);
ring->priv = priv;
- netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64);
ring->index = index;
if (index == DESC_INDEX) {
ring->queue = 0;
TDMA_WRITE_PTR);
bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1,
DMA_END_ADDR);
-
- napi_enable(&ring->napi);
-}
-
-static void bcmgenet_fini_tx_ring(struct bcmgenet_priv *priv,
- unsigned int index)
-{
- struct bcmgenet_tx_ring *ring = &priv->tx_rings[index];
-
- napi_disable(&ring->napi);
- netif_napi_del(&ring->napi);
}
/* Initialize a RDMA ring */
u32 words_per_bd = WORDS_PER_BD(priv);
int ret;
+ ring->priv = priv;
ring->index = index;
+ if (index == DESC_INDEX) {
+ ring->int_enable = bcmgenet_rx_ring16_int_enable;
+ ring->int_disable = bcmgenet_rx_ring16_int_disable;
+ } else {
+ ring->int_enable = bcmgenet_rx_ring_int_enable;
+ ring->int_disable = bcmgenet_rx_ring_int_disable;
+ }
ring->cbs = priv->rx_cbs + start_ptr;
ring->size = size;
ring->c_index = 0;
return ret;
}
+static void bcmgenet_init_tx_napi(struct bcmgenet_priv *priv)
+{
+ unsigned int i;
+ struct bcmgenet_tx_ring *ring;
+
+ for (i = 0; i < priv->hw_params->tx_queues; ++i) {
+ ring = &priv->tx_rings[i];
+ netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64);
+ }
+
+ ring = &priv->tx_rings[DESC_INDEX];
+ netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64);
+}
+
+static void bcmgenet_enable_tx_napi(struct bcmgenet_priv *priv)
+{
+ unsigned int i;
+ struct bcmgenet_tx_ring *ring;
+
+ for (i = 0; i < priv->hw_params->tx_queues; ++i) {
+ ring = &priv->tx_rings[i];
+ napi_enable(&ring->napi);
+ }
+
+ ring = &priv->tx_rings[DESC_INDEX];
+ napi_enable(&ring->napi);
+}
+
+static void bcmgenet_disable_tx_napi(struct bcmgenet_priv *priv)
+{
+ unsigned int i;
+ struct bcmgenet_tx_ring *ring;
+
+ for (i = 0; i < priv->hw_params->tx_queues; ++i) {
+ ring = &priv->tx_rings[i];
+ napi_disable(&ring->napi);
+ }
+
+ ring = &priv->tx_rings[DESC_INDEX];
+ napi_disable(&ring->napi);
+}
+
+static void bcmgenet_fini_tx_napi(struct bcmgenet_priv *priv)
+{
+ unsigned int i;
+ struct bcmgenet_tx_ring *ring;
+
+ for (i = 0; i < priv->hw_params->tx_queues; ++i) {
+ ring = &priv->tx_rings[i];
+ netif_napi_del(&ring->napi);
+ }
+
+ ring = &priv->tx_rings[DESC_INDEX];
+ netif_napi_del(&ring->napi);
+}
+
/* Initialize Tx queues
*
* Queues 0-3 are priority-based, each one has 32 descriptors,
bcmgenet_tdma_writel(priv, dma_priority[1], DMA_PRIORITY_1);
bcmgenet_tdma_writel(priv, dma_priority[2], DMA_PRIORITY_2);
+ /* Initialize Tx NAPI */
+ bcmgenet_init_tx_napi(priv);
+
/* Enable Tx queues */
bcmgenet_tdma_writel(priv, ring_cfg, DMA_RING_CFG);
bcmgenet_tdma_writel(priv, dma_ctrl, DMA_CTRL);
}
+static void bcmgenet_init_rx_napi(struct bcmgenet_priv *priv)
+{
+ unsigned int i;
+ struct bcmgenet_rx_ring *ring;
+
+ for (i = 0; i < priv->hw_params->rx_queues; ++i) {
+ ring = &priv->rx_rings[i];
+ netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, 64);
+ }
+
+ ring = &priv->rx_rings[DESC_INDEX];
+ netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, 64);
+}
+
+static void bcmgenet_enable_rx_napi(struct bcmgenet_priv *priv)
+{
+ unsigned int i;
+ struct bcmgenet_rx_ring *ring;
+
+ for (i = 0; i < priv->hw_params->rx_queues; ++i) {
+ ring = &priv->rx_rings[i];
+ napi_enable(&ring->napi);
+ }
+
+ ring = &priv->rx_rings[DESC_INDEX];
+ napi_enable(&ring->napi);
+}
+
+static void bcmgenet_disable_rx_napi(struct bcmgenet_priv *priv)
+{
+ unsigned int i;
+ struct bcmgenet_rx_ring *ring;
+
+ for (i = 0; i < priv->hw_params->rx_queues; ++i) {
+ ring = &priv->rx_rings[i];
+ napi_disable(&ring->napi);
+ }
+
+ ring = &priv->rx_rings[DESC_INDEX];
+ napi_disable(&ring->napi);
+}
+
+static void bcmgenet_fini_rx_napi(struct bcmgenet_priv *priv)
+{
+ unsigned int i;
+ struct bcmgenet_rx_ring *ring;
+
+ for (i = 0; i < priv->hw_params->rx_queues; ++i) {
+ ring = &priv->rx_rings[i];
+ netif_napi_del(&ring->napi);
+ }
+
+ ring = &priv->rx_rings[DESC_INDEX];
+ netif_napi_del(&ring->napi);
+}
+
/* Initialize Rx queues
*
* Queues 0-15 are priority queues. Hardware Filtering Block (HFB) can be
ring_cfg |= (1 << DESC_INDEX);
dma_ctrl |= (1 << (DESC_INDEX + DMA_RING_BUF_EN_SHIFT));
+ /* Initialize Rx NAPI */
+ bcmgenet_init_rx_napi(priv);
+
/* Enable rings */
bcmgenet_rdma_writel(priv, ring_cfg, DMA_RING_CFG);
static void bcmgenet_fini_dma(struct bcmgenet_priv *priv)
{
- int i;
-
- bcmgenet_fini_tx_ring(priv, DESC_INDEX);
-
- for (i = 0; i < priv->hw_params->tx_queues; i++)
- bcmgenet_fini_tx_ring(priv, i);
+ bcmgenet_fini_rx_napi(priv);
+ bcmgenet_fini_tx_napi(priv);
__bcmgenet_fini_dma(priv);
}
netif_dbg(priv, hw, priv->dev, "%s\n", __func__);
- /* Init rDma */
- bcmgenet_rdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE);
-
/* Initialize common Rx ring structures */
priv->rx_bds = priv->base + priv->hw_params->rdma_offset;
priv->num_rx_bds = TOTAL_DESC;
cb->bd_addr = priv->rx_bds + i * DMA_DESC_SIZE;
}
- /* Initialize Rx queues */
- ret = bcmgenet_init_rx_queues(priv->dev);
- if (ret) {
- netdev_err(priv->dev, "failed to initialize Rx queues\n");
- bcmgenet_free_rx_buffers(priv);
- kfree(priv->rx_cbs);
- return ret;
- }
-
- /* Init tDma */
- bcmgenet_tdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE);
-
/* Initialize common TX ring structures */
priv->tx_bds = priv->base + priv->hw_params->tdma_offset;
priv->num_tx_bds = TOTAL_DESC;
priv->tx_cbs = kcalloc(priv->num_tx_bds, sizeof(struct enet_cb),
GFP_KERNEL);
if (!priv->tx_cbs) {
- __bcmgenet_fini_dma(priv);
+ kfree(priv->rx_cbs);
return -ENOMEM;
}
cb->bd_addr = priv->tx_bds + i * DMA_DESC_SIZE;
}
- /* Initialize Tx queues */
- bcmgenet_init_tx_queues(priv->dev);
-
- return 0;
-}
+ /* Init rDma */
+ bcmgenet_rdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE);
-/* NAPI polling method*/
-static int bcmgenet_poll(struct napi_struct *napi, int budget)
-{
- struct bcmgenet_priv *priv = container_of(napi,
- struct bcmgenet_priv, napi);
- unsigned int work_done;
+ /* Initialize Rx queues */
+ ret = bcmgenet_init_rx_queues(priv->dev);
+ if (ret) {
+ netdev_err(priv->dev, "failed to initialize Rx queues\n");
+ bcmgenet_free_rx_buffers(priv);
+ kfree(priv->rx_cbs);
+ kfree(priv->tx_cbs);
+ return ret;
+ }
- work_done = bcmgenet_desc_rx(priv, DESC_INDEX, budget);
+ /* Init tDma */
+ bcmgenet_tdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE);
- if (work_done < budget) {
- napi_complete(napi);
- bcmgenet_intrl2_0_writel(priv, UMAC_IRQ_RXDMA_BDONE,
- INTRL2_CPU_MASK_CLEAR);
- }
+ /* Initialize Tx queues */
+ bcmgenet_init_tx_queues(priv->dev);
- return work_done;
+ return 0;
}
/* Interrupt bottom half */
}
}
-/* bcmgenet_isr1: interrupt handler for ring buffer. */
+/* bcmgenet_isr1: handle Rx and Tx priority queues */
static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
{
struct bcmgenet_priv *priv = dev_id;
- struct bcmgenet_tx_ring *ring;
+ struct bcmgenet_rx_ring *rx_ring;
+ struct bcmgenet_tx_ring *tx_ring;
unsigned int index;
/* Save irq status for bottom-half processing. */
priv->irq1_stat =
bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS);
+
/* clear interrupts */
bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR);
netif_dbg(priv, intr, priv->dev,
"%s: IRQ=0x%x\n", __func__, priv->irq1_stat);
- /* Check the MBDONE interrupts.
- * packet is done, reclaim descriptors
- */
+ /* Check Rx priority queue interrupts */
+ for (index = 0; index < priv->hw_params->rx_queues; index++) {
+ if (!(priv->irq1_stat & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
+ continue;
+
+ rx_ring = &priv->rx_rings[index];
+
+ if (likely(napi_schedule_prep(&rx_ring->napi))) {
+ rx_ring->int_disable(rx_ring);
+ __napi_schedule(&rx_ring->napi);
+ }
+ }
+
+ /* Check Tx priority queue interrupts */
for (index = 0; index < priv->hw_params->tx_queues; index++) {
if (!(priv->irq1_stat & BIT(index)))
continue;
- ring = &priv->tx_rings[index];
+ tx_ring = &priv->tx_rings[index];
- if (likely(napi_schedule_prep(&ring->napi))) {
- ring->int_disable(priv, ring);
- __napi_schedule(&ring->napi);
+ if (likely(napi_schedule_prep(&tx_ring->napi))) {
+ tx_ring->int_disable(tx_ring);
+ __napi_schedule(&tx_ring->napi);
}
}
return IRQ_HANDLED;
}
-/* bcmgenet_isr0: Handle various interrupts. */
+/* bcmgenet_isr0: handle Rx and Tx default queues + other stuff */
static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
{
struct bcmgenet_priv *priv = dev_id;
+ struct bcmgenet_rx_ring *rx_ring;
+ struct bcmgenet_tx_ring *tx_ring;
/* Save irq status for bottom-half processing. */
priv->irq0_stat =
bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS);
+
/* clear interrupts */
bcmgenet_intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR);
"IRQ=0x%x\n", priv->irq0_stat);
if (priv->irq0_stat & (UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE)) {
- /* We use NAPI(software interrupt throttling, if
- * Rx Descriptor throttling is not used.
- * Disable interrupt, will be enabled in the poll method.
- */
- if (likely(napi_schedule_prep(&priv->napi))) {
- bcmgenet_intrl2_0_writel(priv, UMAC_IRQ_RXDMA_BDONE,
- INTRL2_CPU_MASK_SET);
- __napi_schedule(&priv->napi);
+ rx_ring = &priv->rx_rings[DESC_INDEX];
+
+ if (likely(napi_schedule_prep(&rx_ring->napi))) {
+ rx_ring->int_disable(rx_ring);
+ __napi_schedule(&rx_ring->napi);
}
}
- if (priv->irq0_stat &
- (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE)) {
- struct bcmgenet_tx_ring *ring = &priv->tx_rings[DESC_INDEX];
- if (likely(napi_schedule_prep(&ring->napi))) {
- ring->int_disable(priv, ring);
- __napi_schedule(&ring->napi);
+ if (priv->irq0_stat & (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE)) {
+ tx_ring = &priv->tx_rings[DESC_INDEX];
+
+ if (likely(napi_schedule_prep(&tx_ring->napi))) {
+ tx_ring->int_disable(tx_ring);
+ __napi_schedule(&tx_ring->napi);
}
}
+
if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R |
UMAC_IRQ_PHY_DET_F |
UMAC_IRQ_LINK_UP |
struct bcmgenet_priv *priv = netdev_priv(dev);
/* Start the network engine */
- napi_enable(&priv->napi);
+ bcmgenet_enable_rx_napi(priv);
+ bcmgenet_enable_tx_napi(priv);
umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true);
struct bcmgenet_priv *priv = netdev_priv(dev);
netif_tx_stop_all_queues(dev);
- napi_disable(&priv->napi);
phy_stop(priv->phydev);
-
bcmgenet_intr_disable(priv);
+ bcmgenet_disable_rx_napi(priv);
+ bcmgenet_disable_tx_napi(priv);
/* Wait for pending work items to complete. Since interrupts are
* disabled no new work will be scheduled.
dev->watchdog_timeo = 2 * HZ;
dev->ethtool_ops = &bcmgenet_ethtool_ops;
dev->netdev_ops = &bcmgenet_netdev_ops;
- netif_napi_add(dev, &priv->napi, bcmgenet_poll, 64);
priv->msg_enable = netif_msg_init(-1, GENET_MSG_DEFAULT);
#define UMAC_IRQ_MDIO_DONE (1 << 23)
#define UMAC_IRQ_MDIO_ERROR (1 << 24)
+/* INTRL2 instance 1 definitions */
+#define UMAC_IRQ1_TX_INTR_MASK 0xFFFF
+#define UMAC_IRQ1_RX_INTR_MASK 0xFFFF
+#define UMAC_IRQ1_RX_INTR_SHIFT 16
+
/* Register block offsets */
#define GENET_SYS_OFF 0x0000
#define GENET_GR_BRIDGE_OFF 0x0040
unsigned int prod_index; /* Tx ring producer index SW copy */
unsigned int cb_ptr; /* Tx ring initial CB ptr */
unsigned int end_ptr; /* Tx ring end CB ptr */
- void (*int_enable)(struct bcmgenet_priv *priv,
- struct bcmgenet_tx_ring *);
- void (*int_disable)(struct bcmgenet_priv *priv,
- struct bcmgenet_tx_ring *);
+ void (*int_enable)(struct bcmgenet_tx_ring *);
+ void (*int_disable)(struct bcmgenet_tx_ring *);
struct bcmgenet_priv *priv;
};
struct bcmgenet_rx_ring {
+ struct napi_struct napi; /* Rx NAPI struct */
unsigned int index; /* Rx ring index */
struct enet_cb *cbs; /* Rx ring buffer control block */
unsigned int size; /* Rx ring size */
unsigned int cb_ptr; /* Rx ring initial CB ptr */
unsigned int end_ptr; /* Rx ring end CB ptr */
unsigned int old_discards;
+ void (*int_enable)(struct bcmgenet_rx_ring *);
+ void (*int_disable)(struct bcmgenet_rx_ring *);
+ struct bcmgenet_priv *priv;
};
/* device context */
void __iomem *base;
enum bcmgenet_version version;
struct net_device *dev;
- u32 int0_mask;
- u32 int1_mask;
-
- /* NAPI for descriptor based rx */
- struct napi_struct napi ____cacheline_aligned;
/* transmit variables */
void __iomem *tx_bds;
if ((sof != FC_SOF_I3) && (sof != FC_SOF_N3)) {
dev_err(adap->pdev_dev, "Unsupported SOF 0x%x\n", sof);
- return 0;
+ return false;
}
skb_copy_bits(skb, skb->len - 4, &eof, 1);
if ((eof != FC_EOF_N) && (eof != FC_EOF_T)) {
dev_err(adap->pdev_dev, "Unsupported EOF 0x%x\n", eof);
- return 0;
+ return false;
}
- return 1;
+ return true;
}
/**
#include <linux/firmware.h>
#include <linux/slab.h>
#include <linux/u64_stats_sync.h>
+#include <linux/cpumask.h>
#include "be_hw.h"
#include "be_roce.h"
-#define DRV_VER "10.4u"
+#define DRV_VER "10.6.0.1"
#define DRV_NAME "be2net"
#define BE_NAME "Emulex BladeEngine2"
#define BE3_NAME "Emulex BladeEngine3"
u16 spurious_intr;
struct napi_struct napi;
struct be_adapter *adapter;
+ cpumask_var_t affinity_mask;
#ifdef CONFIG_NET_RX_BUSY_POLL
#define BE_EQ_IDLE 0
napi_hash_del(&eqo->napi);
netif_napi_del(&eqo->napi);
}
+ free_cpumask_var(eqo->affinity_mask);
be_queue_free(adapter, &eqo->q);
}
}
adapter->cfg_num_qs);
for_all_evt_queues(adapter, eqo, i) {
+ if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
+ return -ENOMEM;
+ cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev),
+ eqo->affinity_mask);
+
netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
BE_NAPI_WEIGHT);
napi_hash_add(&eqo->napi);
static int be_tx_qs_create(struct be_adapter *adapter)
{
- struct be_queue_info *cq, *eq;
+ struct be_queue_info *cq;
struct be_tx_obj *txo;
+ struct be_eq_obj *eqo;
int status, i;
adapter->num_tx_qs = min(adapter->num_evt_qs, be_max_txqs(adapter));
/* If num_evt_qs is less than num_tx_qs, then more than
* one txq share an eq
*/
- eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
- status = be_cmd_cq_create(adapter, cq, eq, false, 3);
+ eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
+ status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
if (status)
return status;
status = be_cmd_txq_create(adapter, txo);
if (status)
return status;
+
+ netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
+ eqo->idx);
}
dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
if (status)
goto err_msix;
+
+ irq_set_affinity_hint(vec, eqo->affinity_mask);
}
return 0;
{
struct net_device *netdev = adapter->netdev;
struct be_eq_obj *eqo;
- int i;
+ int i, vec;
if (!adapter->isr_registered)
return;
}
/* MSIx */
- for_all_evt_queues(adapter, eqo, i)
- free_irq(be_msix_vec_get(adapter, eqo), eqo);
+ for_all_evt_queues(adapter, eqo, i) {
+ vec = be_msix_vec_get(adapter, eqo);
+ irq_set_affinity_hint(vec, NULL);
+ free_irq(vec, eqo);
+ }
done:
adapter->isr_registered = false;
#endif
.ndo_fix_features = igb_fix_features,
.ndo_set_features = igb_set_features,
+ .ndo_features_check = passthru_features_check,
};
/**
struct net_device *dev,
netdev_features_t features)
{
+ features = vlan_features_check(skb, features);
return vxlan_features_check(skb, features);
}
#endif
struct net_device *dev,
netdev_features_t features)
{
+ features = vlan_features_check(skb, features);
return vxlan_features_check(skb, features);
}
#endif
int value, unsigned int nsdelay)
{
struct gpio_desc *gpio = *desc;
+ enum gpiod_flags flags = value ? GPIOD_OUT_LOW : GPIOD_OUT_HIGH;
int res;
- gpio = devm_gpiod_get_index(dev, name, index);
- if (IS_ERR(gpio)) {
- if (PTR_ERR(gpio) == -ENOENT) {
- *desc = NULL;
- return 0;
- }
-
+ gpio = devm_gpiod_get_index_optional(dev, name, index, flags);
+ if (IS_ERR(gpio))
return PTR_ERR(gpio);
+
+ if (gpio) {
+ if (nsdelay)
+ usleep_range(nsdelay, 2 * nsdelay);
+ gpiod_set_value_cansleep(gpio, value);
}
- res = gpiod_direction_output(gpio, !value);
- if (res) {
- dev_err(dev, "unable to toggle gpio %s: %i\n", name, res);
- devm_gpiod_put(dev, gpio);
- gpio = NULL;
- return res;
- }
- if (nsdelay)
- usleep_range(nsdelay, 2 * nsdelay);
- gpiod_set_value_cansleep(gpio, value);
*desc = gpio;
return 0;
struct hv_device *device;
bool is_data_pkt;
+ bool xmit_more; /* from skb */
u16 vlan_tci;
u16 q_idx;
#define VRSS_SEND_TAB_SIZE 16
-/* Per netvsc channel-specific */
+#define RNDIS_MAX_PKT_DEFAULT 8
+#define RNDIS_PKT_ALIGN_DEFAULT 8
+
+struct multi_send_data {
+ spinlock_t lock; /* protect struct multi_send_data */
+ struct hv_netvsc_packet *pkt; /* netvsc pkt pending */
+ u32 count; /* counter of batched packets */
+};
+
+/* Per netvsc device */
struct netvsc_device {
struct hv_device *dev;
unsigned char *cb_buffer;
/* The sub channel callback buffer */
unsigned char *sub_cb_buf;
+
+ struct multi_send_data msd[NR_CPUS];
+ u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
+ u32 pkt_align; /* alignment bytes, e.g. 8 */
};
/* NdisInitialize message */
{
struct netvsc_device *net_device;
struct net_device *ndev = hv_get_drvdata(device);
+ int i;
net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
if (!net_device)
net_device->destroy = false;
net_device->dev = device;
net_device->ndev = ndev;
+ net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
+ net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
+
+ for (i = 0; i < num_online_cpus(); i++)
+ spin_lock_init(&net_device->msd[i].lock);
hv_set_drvdata(device, net_device);
return net_device;
static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
unsigned int section_index,
+ u32 pend_size,
struct hv_netvsc_packet *packet)
{
char *start = net_device->send_buf;
- char *dest = (start + (section_index * net_device->send_section_size));
+ char *dest = start + (section_index * net_device->send_section_size)
+ + pend_size;
int i;
u32 msg_size = 0;
+ u32 padding = 0;
+ u32 remain = packet->total_data_buflen % net_device->pkt_align;
+
+ /* Add padding */
+ if (packet->is_data_pkt && packet->xmit_more && remain) {
+ padding = net_device->pkt_align - remain;
+ packet->rndis_msg->msg_len += padding;
+ packet->total_data_buflen += padding;
+ }
for (i = 0; i < packet->page_buf_cnt; i++) {
char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
msg_size += len;
dest += len;
}
+
+ if (padding) {
+ memset(dest, 0, padding);
+ msg_size += padding;
+ }
+
return msg_size;
}
-int netvsc_send(struct hv_device *device,
- struct hv_netvsc_packet *packet)
+static inline int netvsc_send_pkt(
+ struct hv_netvsc_packet *packet,
+ struct netvsc_device *net_device)
{
- struct netvsc_device *net_device;
- int ret = 0;
- struct nvsp_message sendMessage;
- struct net_device *ndev;
- struct vmbus_channel *out_channel = NULL;
- u64 req_id;
- unsigned int section_index = NETVSC_INVALID_INDEX;
- u32 msg_size = 0;
- struct sk_buff *skb = NULL;
+ struct nvsp_message nvmsg;
+ struct vmbus_channel *out_channel = packet->channel;
u16 q_idx = packet->q_idx;
+ struct net_device *ndev = net_device->ndev;
+ u64 req_id;
+ int ret;
-
- net_device = get_outbound_net_device(device);
- if (!net_device)
- return -ENODEV;
- ndev = net_device->ndev;
-
- sendMessage.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
+ nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
if (packet->is_data_pkt) {
/* 0 is RMC_DATA; */
- sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 0;
+ nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 0;
} else {
/* 1 is RMC_CONTROL; */
- sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
+ nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 1;
}
- /* Attempt to send via sendbuf */
- if (packet->total_data_buflen < net_device->send_section_size) {
- section_index = netvsc_get_next_send_section(net_device);
- if (section_index != NETVSC_INVALID_INDEX) {
- msg_size = netvsc_copy_to_send_buf(net_device,
- section_index,
- packet);
- skb = (struct sk_buff *)
- (unsigned long)packet->send_completion_tid;
- packet->page_buf_cnt = 0;
- }
- }
- packet->send_buf_index = section_index;
-
-
- sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
- section_index;
- sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size;
+ nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
+ packet->send_buf_index;
+ if (packet->send_buf_index == NETVSC_INVALID_INDEX)
+ nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
+ else
+ nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size =
+ packet->total_data_buflen;
if (packet->send_completion)
req_id = (ulong)packet;
else
req_id = 0;
- out_channel = net_device->chn_table[packet->q_idx];
- if (out_channel == NULL)
- out_channel = device->channel;
- packet->channel = out_channel;
-
if (out_channel->rescind)
return -ENODEV;
ret = vmbus_sendpacket_pagebuffer(out_channel,
packet->page_buf,
packet->page_buf_cnt,
- &sendMessage,
+ &nvmsg,
sizeof(struct nvsp_message),
req_id);
} else {
- ret = vmbus_sendpacket(out_channel, &sendMessage,
+ ret = vmbus_sendpacket(
+ out_channel, &nvmsg,
sizeof(struct nvsp_message),
req_id,
VM_PKT_DATA_INBAND,
packet, ret);
}
+ return ret;
+}
+
+int netvsc_send(struct hv_device *device,
+ struct hv_netvsc_packet *packet)
+{
+ struct netvsc_device *net_device;
+ int ret = 0, m_ret = 0;
+ struct vmbus_channel *out_channel;
+ u16 q_idx = packet->q_idx;
+ u32 pktlen = packet->total_data_buflen, msd_len = 0;
+ unsigned int section_index = NETVSC_INVALID_INDEX;
+ struct sk_buff *skb = NULL;
+ unsigned long flag;
+ struct multi_send_data *msdp;
+ struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
+
+ net_device = get_outbound_net_device(device);
+ if (!net_device)
+ return -ENODEV;
+
+ out_channel = net_device->chn_table[q_idx];
+ if (!out_channel) {
+ out_channel = device->channel;
+ q_idx = 0;
+ packet->q_idx = 0;
+ }
+ packet->channel = out_channel;
+ packet->send_buf_index = NETVSC_INVALID_INDEX;
+
+ msdp = &net_device->msd[q_idx];
+
+ /* batch packets in send buffer if possible */
+ spin_lock_irqsave(&msdp->lock, flag);
+ if (msdp->pkt)
+ msd_len = msdp->pkt->total_data_buflen;
+
+ if (packet->is_data_pkt && msd_len > 0 &&
+ msdp->count < net_device->max_pkt &&
+ msd_len + pktlen + net_device->pkt_align <
+ net_device->send_section_size) {
+ section_index = msdp->pkt->send_buf_index;
+
+ } else if (packet->is_data_pkt && pktlen + net_device->pkt_align <
+ net_device->send_section_size) {
+ section_index = netvsc_get_next_send_section(net_device);
+ if (section_index != NETVSC_INVALID_INDEX) {
+ msd_send = msdp->pkt;
+ msdp->pkt = NULL;
+ msdp->count = 0;
+ msd_len = 0;
+ }
+ }
+
+ if (section_index != NETVSC_INVALID_INDEX) {
+ netvsc_copy_to_send_buf(net_device,
+ section_index, msd_len,
+ packet);
+ skb = (struct sk_buff *)
+ (unsigned long)packet->send_completion_tid;
+
+ packet->page_buf_cnt = 0;
+ packet->send_buf_index = section_index;
+ packet->total_data_buflen += msd_len;
+
+ kfree(msdp->pkt);
+ if (packet->xmit_more) {
+ msdp->pkt = packet;
+ msdp->count++;
+ } else {
+ cur_send = packet;
+ msdp->pkt = NULL;
+ msdp->count = 0;
+ }
+ } else {
+ msd_send = msdp->pkt;
+ msdp->pkt = NULL;
+ msdp->count = 0;
+ cur_send = packet;
+ }
+
+ spin_unlock_irqrestore(&msdp->lock, flag);
+
+ if (msd_send) {
+ m_ret = netvsc_send_pkt(msd_send, net_device);
+
+ if (m_ret != 0) {
+ netvsc_free_send_slot(net_device,
+ msd_send->send_buf_index);
+ kfree(msd_send);
+ }
+ }
+
+ if (cur_send)
+ ret = netvsc_send_pkt(cur_send, net_device);
+
if (ret != 0) {
if (section_index != NETVSC_INVALID_INDEX)
netvsc_free_send_slot(net_device, section_index);
return NETDEV_TX_OK;
}
+ packet->xmit_more = skb->xmit_more;
+
packet->vlan_tci = skb->vlan_tci;
packet->q_idx = skb_get_queue_mapping(skb);
}
packet->send_completion = NULL;
+ packet->xmit_more = false;
ret = netvsc_send(dev->net_dev->dev, packet);
return ret;
u32 status;
int ret;
unsigned long t;
+ struct netvsc_device *nvdev = dev->net_dev;
request = get_rndis_request(dev, RNDIS_MSG_INIT,
RNDIS_MESSAGE_SIZE(struct rndis_initialize_request));
status = init_complete->status;
if (status == RNDIS_STATUS_SUCCESS) {
dev->state = RNDIS_DEV_INITIALIZED;
+ nvdev->max_pkt = init_complete->max_pkt_per_msg;
+ nvdev->pkt_align = 1 << init_complete->pkt_alignment_factor;
ret = 0;
} else {
dev->state = RNDIS_DEV_UNINITIALIZED;
net_device->num_chn = 1 +
init_packet->msg.v5_msg.subchn_comp.num_subchannels;
- vmbus_are_subchannels_present(dev->channel);
-
ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn);
out:
.ndo_change_carrier = team_change_carrier,
.ndo_bridge_setlink = ndo_dflt_netdev_switch_port_bridge_setlink,
.ndo_bridge_dellink = ndo_dflt_netdev_switch_port_bridge_dellink,
+ .ndo_features_check = passthru_features_check,
};
/***********************
{
struct receive_queue *rq =
container_of(napi, struct receive_queue, napi);
- unsigned int r, received = 0;
+ unsigned int r, received;
- received += virtnet_receive(rq, budget - received);
+ received = virtnet_receive(rq, budget);
/* Out of packets? */
if (received < budget) {
ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */
ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */
+ ARG_PTR_TO_CTX, /* pointer to context */
ARG_ANYTHING, /* any (initialized) argument is ok */
};
skb->protocol = htons(ETH_P_802_2);
}
+/**
+ * skb_vlan_tagged - check if skb is vlan tagged.
+ * @skb: skbuff to query
+ *
+ * Returns true if the skb is tagged, regardless of whether it is hardware
+ * accelerated or not.
+ */
+static inline bool skb_vlan_tagged(const struct sk_buff *skb)
+{
+ if (!skb_vlan_tag_present(skb) &&
+ likely(skb->protocol != htons(ETH_P_8021Q) &&
+ skb->protocol != htons(ETH_P_8021AD)))
+ return false;
+
+ return true;
+}
+
+/**
+ * skb_vlan_tagged_multi - check if skb is vlan tagged with multiple headers.
+ * @skb: skbuff to query
+ *
+ * Returns true if the skb is tagged with multiple vlan headers, regardless
+ * of whether it is hardware accelerated or not.
+ */
+static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb)
+{
+ __be16 protocol = skb->protocol;
+
+ if (!skb_vlan_tag_present(skb)) {
+ struct vlan_ethhdr *veh;
+
+ if (likely(protocol != htons(ETH_P_8021Q) &&
+ protocol != htons(ETH_P_8021AD)))
+ return false;
+
+ veh = (struct vlan_ethhdr *)skb->data;
+ protocol = veh->h_vlan_encapsulated_proto;
+ }
+
+ if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD))
+ return false;
+
+ return true;
+}
+
+/**
+ * vlan_features_check - drop unsafe features for skb with multiple tags.
+ * @skb: skbuff to query
+ * @features: features to be checked
+ *
+ * Returns features without unsafe ones if the skb has multiple tags.
+ */
+static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
+ netdev_features_t features)
+{
+ if (skb_vlan_tagged_multi(skb))
+ features = netdev_intersect_features(features,
+ NETIF_F_SG |
+ NETIF_F_HIGHDMA |
+ NETIF_F_FRAGLIST |
+ NETIF_F_GEN_CSUM |
+ NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX);
+
+ return features;
+}
+
#endif /* !(_LINUX_IF_VLAN_H_) */
void netif_stacked_transfer_operstate(const struct net_device *rootdev,
struct net_device *dev);
+netdev_features_t passthru_features_check(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features);
netdev_features_t netif_skb_features(struct sk_buff *skb);
static inline bool net_gso_ok(netdev_features_t features, int gso_type)
};
typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
-typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed);
+typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
const void *obj);
const char *ptr = rht_obj(ht, he);
return likely(params.obj_hashfn) ?
- rht_bucket_index(tbl, params.obj_hashfn(ptr, tbl->hash_rnd)) :
+ rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?:
+ ht->p.key_len,
+ tbl->hash_rnd)) :
rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
}
u32 br_port_mask);
int (*port_stp_update)(struct dsa_switch *ds, int port,
u8 state);
+ int (*fdb_add)(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid);
+ int (*fdb_del)(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid);
+ int (*fdb_getnext)(struct dsa_switch *ds, int port,
+ unsigned char *addr, bool *is_static);
};
void register_switch_driver(struct dsa_switch_driver *type);
/**
* struct nft_set_elem - generic representation of set elements
*
- * @cookie: implementation specific element cookie
* @key: element key
- * @data: element data (maps only)
- * @flags: element flags (end of interval)
- *
- * The cookie can be used to store a handle to the element for subsequent
- * removal.
+ * @priv: element private data and extensions
*/
struct nft_set_elem {
- void *cookie;
struct nft_data key;
- struct nft_data data;
- u32 flags;
+ void *priv;
};
struct nft_set;
enum nft_set_class class;
};
+struct nft_set_ext;
+
/**
* struct nft_set_ops - nf_tables set operations
*
* @lookup: look up an element within the set
* @insert: insert new element into set
+ * @activate: activate new element in the next generation
+ * @deactivate: deactivate element in the next generation
* @remove: remove element from set
* @walk: iterate over all set elemeennts
* @privsize: function to return size of set private data
* @destroy: destroy private data of set instance
* @list: nf_tables_set_ops list node
* @owner: module reference
+ * @elemsize: element private size
* @features: features supported by the implementation
*/
struct nft_set_ops {
bool (*lookup)(const struct nft_set *set,
const struct nft_data *key,
- struct nft_data *data);
- int (*get)(const struct nft_set *set,
- struct nft_set_elem *elem);
+ const struct nft_set_ext **ext);
int (*insert)(const struct nft_set *set,
const struct nft_set_elem *elem);
+ void (*activate)(const struct nft_set *set,
+ const struct nft_set_elem *elem);
+ void * (*deactivate)(const struct nft_set *set,
+ const struct nft_set_elem *elem);
void (*remove)(const struct nft_set *set,
const struct nft_set_elem *elem);
void (*walk)(const struct nft_ctx *ctx,
struct list_head list;
struct module *owner;
+ unsigned int elemsize;
u32 features;
};
* @nelems: number of elements
* @policy: set parameterization (see enum nft_set_policies)
* @ops: set ops
+ * @pnet: network namespace
* @flags: set flags
* @klen: key length
* @dlen: data length
u16 policy;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
+ possible_net_t pnet;
u16 flags;
u8 klen;
u8 dlen;
void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding);
+/**
+ * enum nft_set_extensions - set extension type IDs
+ *
+ * @NFT_SET_EXT_KEY: element key
+ * @NFT_SET_EXT_DATA: mapping data
+ * @NFT_SET_EXT_FLAGS: element flags
+ * @NFT_SET_EXT_NUM: number of extension types
+ */
+enum nft_set_extensions {
+ NFT_SET_EXT_KEY,
+ NFT_SET_EXT_DATA,
+ NFT_SET_EXT_FLAGS,
+ NFT_SET_EXT_NUM
+};
+
+/**
+ * struct nft_set_ext_type - set extension type
+ *
+ * @len: fixed part length of the extension
+ * @align: alignment requirements of the extension
+ */
+struct nft_set_ext_type {
+ u8 len;
+ u8 align;
+};
+
+extern const struct nft_set_ext_type nft_set_ext_types[];
+
+/**
+ * struct nft_set_ext_tmpl - set extension template
+ *
+ * @len: length of extension area
+ * @offset: offsets of individual extension types
+ */
+struct nft_set_ext_tmpl {
+ u16 len;
+ u8 offset[NFT_SET_EXT_NUM];
+};
+
+/**
+ * struct nft_set_ext - set extensions
+ *
+ * @genmask: generation mask
+ * @offset: offsets of individual extension types
+ * @data: beginning of extension data
+ */
+struct nft_set_ext {
+ u8 genmask;
+ u8 offset[NFT_SET_EXT_NUM];
+ char data[0];
+};
+
+static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl)
+{
+ memset(tmpl, 0, sizeof(*tmpl));
+ tmpl->len = sizeof(struct nft_set_ext);
+}
+
+static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id,
+ unsigned int len)
+{
+ tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align);
+ BUG_ON(tmpl->len > U8_MAX);
+ tmpl->offset[id] = tmpl->len;
+ tmpl->len += nft_set_ext_types[id].len + len;
+}
+
+static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id)
+{
+ nft_set_ext_add_length(tmpl, id, 0);
+}
+
+static inline void nft_set_ext_init(struct nft_set_ext *ext,
+ const struct nft_set_ext_tmpl *tmpl)
+{
+ memcpy(ext->offset, tmpl->offset, sizeof(ext->offset));
+}
+
+static inline bool __nft_set_ext_exists(const struct nft_set_ext *ext, u8 id)
+{
+ return !!ext->offset[id];
+}
+
+static inline bool nft_set_ext_exists(const struct nft_set_ext *ext, u8 id)
+{
+ return ext && __nft_set_ext_exists(ext, id);
+}
+
+static inline void *nft_set_ext(const struct nft_set_ext *ext, u8 id)
+{
+ return (void *)ext + ext->offset[id];
+}
+
+static inline struct nft_data *nft_set_ext_key(const struct nft_set_ext *ext)
+{
+ return nft_set_ext(ext, NFT_SET_EXT_KEY);
+}
+
+static inline struct nft_data *nft_set_ext_data(const struct nft_set_ext *ext)
+{
+ return nft_set_ext(ext, NFT_SET_EXT_DATA);
+}
+
+static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext)
+{
+ return nft_set_ext(ext, NFT_SET_EXT_FLAGS);
+}
+
+static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
+ void *elem)
+{
+ return elem + set->ops->elemsize;
+}
+
+void nft_set_elem_destroy(const struct nft_set *set, void *elem);
/**
* struct nft_expr_type - nf_tables expression type
*
* @rules: list of rules in the chain
* @list: used internally
- * @net: net namespace that this chain belongs to
* @table: table that this chain belongs to
* @handle: chain handle
* @use: number of jump references to this chain
struct nft_chain {
struct list_head rules;
struct list_head list;
- struct net *net;
struct nft_table *table;
u64 handle;
u32 use;
* struct nft_base_chain - nf_tables base chain
*
* @ops: netfilter hook ops
+ * @pnet: net namespace that this chain belongs to
* @type: chain type
* @policy: default policy
* @stats: per-cpu chain stats
*/
struct nft_base_chain {
struct nf_hook_ops ops[NFT_HOOK_OPS_MAX];
+ possible_net_t pnet;
const struct nf_chain_type *type;
u8 policy;
struct nft_stats __percpu *stats;
#define MODULE_ALIAS_NFT_SET() \
MODULE_ALIAS("nft-set")
+/*
+ * The gencursor defines two generations, the currently active and the
+ * next one. Objects contain a bitmask of 2 bits specifying the generations
+ * they're active in. A set bit means they're inactive in the generation
+ * represented by that bit.
+ *
+ * New objects start out as inactive in the current and active in the
+ * next generation. When committing the ruleset the bitmask is cleared,
+ * meaning they're active in all generations. When removing an object,
+ * it is set inactive in the next generation. After committing the ruleset,
+ * the objects are removed.
+ */
+static inline unsigned int nft_gencursor_next(const struct net *net)
+{
+ return net->nft.gencursor + 1 == 1 ? 1 : 0;
+}
+
+static inline u8 nft_genmask_next(const struct net *net)
+{
+ return 1 << nft_gencursor_next(net);
+}
+
+static inline u8 nft_genmask_cur(const struct net *net)
+{
+ /* Use ACCESS_ONCE() to prevent refetching the value for atomicity */
+ return 1 << ACCESS_ONCE(net->nft.gencursor);
+}
+
+/*
+ * Set element transaction helpers
+ */
+
+static inline bool nft_set_elem_active(const struct nft_set_ext *ext,
+ u8 genmask)
+{
+ return !(ext->genmask & genmask);
+}
+
+static inline void nft_set_elem_change_active(const struct nft_set *set,
+ struct nft_set_ext *ext)
+{
+ ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet));
+}
+
/**
* struct nft_trans - nf_tables object update in transaction
*
void tcp_send_fin(struct sock *sk);
void tcp_send_active_reset(struct sock *sk, gfp_t priority);
int tcp_send_synack(struct sock *);
-bool tcp_syn_flood_action(struct sock *sk, const struct sk_buff *skb,
- const char *proto);
void tcp_push_one(struct sock *, unsigned int mss_now);
void tcp_send_ack(struct sock *sk);
void tcp_send_delayed_ack(struct sock *sk);
BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
+ BPF_FUNC_skb_store_bytes, /* int skb_store_bytes(skb, offset, from, len) */
__BPF_FUNC_MAX_ID,
};
expected_type = CONST_IMM;
} else if (arg_type == ARG_CONST_MAP_PTR) {
expected_type = CONST_PTR_TO_MAP;
+ } else if (arg_type == ARG_PTR_TO_CTX) {
+ expected_type = PTR_TO_CTX;
} else {
verbose("unsupported arg_type %d\n", arg_type);
return -EFAULT;
* struct rhash_head node;
* };
*
- * u32 my_hash_fn(const void *data, u32 seed)
+ * u32 my_hash_fn(const void *data, u32 len, u32 seed)
* {
* struct test_obj *obj = data;
*
if (dev->features & NETIF_F_VLAN_FEATURES)
netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n");
+ dev->vlan_features = real_dev->vlan_features & ~NETIF_F_ALL_FCOE;
/* ipv6 shared card related stuff */
dev->dev_id = real_dev->dev_id;
return features;
}
+netdev_features_t passthru_features_check(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+ return features;
+}
+EXPORT_SYMBOL(passthru_features_check);
+
+static netdev_features_t dflt_features_check(const struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+ return vlan_features_check(skb, features);
+}
+
netdev_features_t netif_skb_features(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
netdev_features_t features = dev->features;
u16 gso_segs = skb_shinfo(skb)->gso_segs;
- __be16 protocol = skb->protocol;
if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
features &= ~NETIF_F_GSO_MASK;
if (skb->encapsulation)
features &= dev->hw_enc_features;
- if (!skb_vlan_tag_present(skb)) {
- if (unlikely(protocol == htons(ETH_P_8021Q) ||
- protocol == htons(ETH_P_8021AD))) {
- struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
- protocol = veh->h_vlan_encapsulated_proto;
- } else {
- goto finalize;
- }
- }
-
- features = netdev_intersect_features(features,
- dev->vlan_features |
- NETIF_F_HW_VLAN_CTAG_TX |
- NETIF_F_HW_VLAN_STAG_TX);
-
- if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
+ if (skb_vlan_tagged(skb))
features = netdev_intersect_features(features,
- NETIF_F_SG |
- NETIF_F_HIGHDMA |
- NETIF_F_FRAGLIST |
- NETIF_F_GEN_CSUM |
+ dev->vlan_features |
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX);
-finalize:
if (dev->netdev_ops->ndo_features_check)
features &= dev->netdev_ops->ndo_features_check(skb, dev,
features);
+ else
+ features &= dflt_features_check(skb, dev, features);
return harmonize_features(skb, features);
}
return 0;
}
+static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ unsigned int offset = (unsigned int) r2;
+ void *from = (void *) (long) r3;
+ unsigned int len = (unsigned int) r4;
+ char buf[16];
+ void *ptr;
+
+ /* bpf verifier guarantees that:
+ * 'from' pointer points to bpf program stack
+ * 'len' bytes of it were initialized
+ * 'len' > 0
+ * 'skb' is a valid pointer to 'struct sk_buff'
+ *
+ * so check for invalid 'offset' and too large 'len'
+ */
+ if (offset > 0xffff || len > sizeof(buf))
+ return -EFAULT;
+
+ if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len))
+ return -EFAULT;
+
+ ptr = skb_header_pointer(skb, offset, len, buf);
+ if (unlikely(!ptr))
+ return -EFAULT;
+
+ skb_postpull_rcsum(skb, ptr, len);
+
+ memcpy(ptr, from, len);
+
+ if (ptr == buf)
+ /* skb_store_bits cannot return -EFAULT here */
+ skb_store_bits(skb, offset, ptr, len);
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0));
+ return 0;
+}
+
+const struct bpf_func_proto bpf_skb_store_bytes_proto = {
+ .func = bpf_skb_store_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_STACK,
+ .arg4_type = ARG_CONST_STACK_SIZE,
+};
+
static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id)
{
}
}
+static const struct bpf_func_proto *
+tc_cls_act_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_skb_store_bytes:
+ return &bpf_skb_store_bytes_proto;
+ default:
+ return sk_filter_func_proto(func_id);
+ }
+}
+
static bool sk_filter_is_valid_access(int off, int size,
enum bpf_access_type type)
{
.convert_ctx_access = sk_filter_convert_ctx_access,
};
+static const struct bpf_verifier_ops tc_cls_act_ops = {
+ .get_func_proto = tc_cls_act_func_proto,
+ .is_valid_access = sk_filter_is_valid_access,
+ .convert_ctx_access = sk_filter_convert_ctx_access,
+};
+
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
.ops = &sk_filter_ops,
.type = BPF_PROG_TYPE_SOCKET_FILTER,
};
static struct bpf_prog_type_list sched_cls_type __read_mostly = {
- .ops = &sk_filter_ops,
+ .ops = &tc_cls_act_ops,
.type = BPF_PROG_TYPE_SCHED_CLS,
};
static struct bpf_prog_type_list sched_act_type __read_mostly = {
- .ops = &sk_filter_ops,
+ .ops = &tc_cls_act_ops,
.type = BPF_PROG_TYPE_SCHED_ACT,
};
return 0;
}
+static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr, u16 vid, u16 nlm_flags)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+ int ret = -EOPNOTSUPP;
+
+ if (ds->drv->fdb_add)
+ ret = ds->drv->fdb_add(ds, p->port, addr, vid);
+
+ return ret;
+}
+
+static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr, u16 vid)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+ int ret = -EOPNOTSUPP;
+
+ if (ds->drv->fdb_del)
+ ret = ds->drv->fdb_del(ds, p->port, addr, vid);
+
+ return ret;
+}
+
+static int dsa_slave_fill_info(struct net_device *dev, struct sk_buff *skb,
+ const unsigned char *addr, u16 vid,
+ bool is_static,
+ u32 portid, u32 seq, int type,
+ unsigned int flags)
+{
+ struct nlmsghdr *nlh;
+ struct ndmsg *ndm;
+
+ nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ ndm = nlmsg_data(nlh);
+ ndm->ndm_family = AF_BRIDGE;
+ ndm->ndm_pad1 = 0;
+ ndm->ndm_pad2 = 0;
+ ndm->ndm_flags = NTF_EXT_LEARNED;
+ ndm->ndm_type = 0;
+ ndm->ndm_ifindex = dev->ifindex;
+ ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
+
+ if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr))
+ goto nla_put_failure;
+
+ if (vid && nla_put_u16(skb, NDA_VLAN, vid))
+ goto nla_put_failure;
+
+ nlmsg_end(skb, nlh);
+ return 0;
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+/* Dump information about entries, in response to GETNEIGH */
+static int dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct net_device *dev,
+ struct net_device *filter_dev, int idx)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+ unsigned char addr[ETH_ALEN] = { 0 };
+ int ret;
+
+ if (!ds->drv->fdb_getnext)
+ return -EOPNOTSUPP;
+
+ for (; ; idx++) {
+ bool is_static;
+
+ ret = ds->drv->fdb_getnext(ds, p->port, addr, &is_static);
+ if (ret < 0)
+ break;
+
+ if (idx < cb->args[0])
+ continue;
+
+ ret = dsa_slave_fill_info(dev, skb, addr, 0,
+ is_static,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH, NLM_F_MULTI);
+ if (ret < 0)
+ break;
+ }
+
+ return idx;
+}
+
static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
struct dsa_slave_priv *p = netdev_priv(dev);
.ndo_change_rx_flags = dsa_slave_change_rx_flags,
.ndo_set_rx_mode = dsa_slave_set_rx_mode,
.ndo_set_mac_address = dsa_slave_set_mac_address,
+ .ndo_fdb_add = dsa_slave_fdb_add,
+ .ndo_fdb_del = dsa_slave_fdb_del,
+ .ndo_fdb_dump = dsa_slave_fdb_dump,
.ndo_do_ioctl = dsa_slave_ioctl,
};
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/skbuff.h>
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
- .level = 5,
+ .level = LOGLEVEL_NOTICE,
.logflags = NF_LOG_MASK,
},
},
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/skbuff.h>
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
- .level = 5,
+ .level = LOGLEVEL_NOTICE,
.logflags = NF_LOG_MASK,
},
},
}
EXPORT_SYMBOL(inet_reqsk_alloc);
+/*
+ * Return true if a syncookie should be sent
+ */
+static bool tcp_syn_flood_action(struct sock *sk,
+ const struct sk_buff *skb,
+ const char *proto)
+{
+ const char *msg = "Dropping request";
+ bool want_cookie = false;
+ struct listen_sock *lopt;
+
+#ifdef CONFIG_SYN_COOKIES
+ if (sysctl_tcp_syncookies) {
+ msg = "Sending cookies";
+ want_cookie = true;
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
+ } else
+#endif
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
+
+ lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
+ if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
+ lopt->synflood_warned = 1;
+ pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
+ proto, ntohs(tcp_hdr(skb)->dest), msg);
+ }
+ return want_cookie;
+}
+
int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb)
kfree(inet_rsk(req)->opt);
}
-/*
- * Return true if a syncookie should be sent
- */
-bool tcp_syn_flood_action(struct sock *sk,
- const struct sk_buff *skb,
- const char *proto)
-{
- const char *msg = "Dropping request";
- bool want_cookie = false;
- struct listen_sock *lopt;
-
-#ifdef CONFIG_SYN_COOKIES
- if (sysctl_tcp_syncookies) {
- msg = "Sending cookies";
- want_cookie = true;
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
- } else
-#endif
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
-
- lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
- if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
- lopt->synflood_warned = 1;
- pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
- proto, ntohs(tcp_hdr(skb)->dest), msg);
- }
- return want_cookie;
-}
-EXPORT_SYMBOL(tcp_syn_flood_action);
#ifdef CONFIG_TCP_MD5SIG
/*
ops = fib_rules_register(&fib6_rules_ops_template, net);
if (IS_ERR(ops))
return PTR_ERR(ops);
- net->ipv6.fib6_rules_ops = ops;
-
- err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0,
- RT6_TABLE_LOCAL, 0);
+ err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL, 0);
if (err)
goto out_fib6_rules_ops;
- err = fib_default_rule_add(net->ipv6.fib6_rules_ops,
- 0x7FFE, RT6_TABLE_MAIN, 0);
+ err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN, 0);
if (err)
goto out_fib6_rules_ops;
+ net->ipv6.fib6_rules_ops = ops;
out:
return err;
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
#include <linux/capability.h>
#include <linux/in.h>
#include <linux/skbuff.h>
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
- .level = 4,
+ .level = LOGLEVEL_WARNING,
.logflags = NF_LOG_MASK,
},
},
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/skbuff.h>
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
- .level = 5,
+ .level = LOGLEVEL_NOTICE,
.logflags = NF_LOG_MASK,
},
},
typical Network Address Translation (NAT) packet transformations.
config NFT_QUEUE
- depends on NETFILTER_XTABLES
depends on NETFILTER_NETLINK_QUEUE
tristate "Netfilter nf_tables queue module"
help
static inline bool
nft_rule_is_active(struct net *net, const struct nft_rule *rule)
{
- return (rule->genmask & (1 << net->nft.gencursor)) == 0;
-}
-
-static inline int gencursor_next(struct net *net)
-{
- return net->nft.gencursor+1 == 1 ? 1 : 0;
+ return (rule->genmask & nft_genmask_cur(net)) == 0;
}
static inline int
nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
{
- return (rule->genmask & (1 << gencursor_next(net))) == 0;
+ return (rule->genmask & nft_genmask_next(net)) == 0;
}
static inline void
nft_rule_activate_next(struct net *net, struct nft_rule *rule)
{
/* Now inactive, will be active in the future */
- rule->genmask = (1 << net->nft.gencursor);
+ rule->genmask = nft_genmask_cur(net);
}
static inline void
nft_rule_deactivate_next(struct net *net, struct nft_rule *rule)
{
- rule->genmask = (1 << gencursor_next(net));
+ rule->genmask = nft_genmask_next(net);
}
static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
{
- rule->genmask &= ~(1 << gencursor_next(net));
+ rule->genmask &= ~nft_genmask_next(net);
}
static int
rcu_assign_pointer(basechain->stats, stats);
}
+ write_pnet(&basechain->pnet, net);
basechain->type = type;
chain = &basechain->chain;
INIT_LIST_HEAD(&chain->rules);
chain->handle = nf_tables_alloc_handle(table);
- chain->net = net;
chain->table = table;
nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
goto err2;
INIT_LIST_HEAD(&set->bindings);
+ write_pnet(&set->pnet, net);
set->ops = ops;
set->ktype = ktype;
set->klen = desc.klen;
const struct nft_set_iter *iter,
const struct nft_set_elem *elem)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
enum nft_registers dreg;
dreg = nft_type_to_reg(set->dtype);
- return nft_validate_data_load(ctx, dreg, &elem->data,
+ return nft_validate_data_load(ctx, dreg, nft_set_ext_data(ext),
set->dtype == NFT_DATA_VERDICT ?
NFT_DATA_VERDICT : NFT_DATA_VALUE);
}
nf_tables_set_destroy(ctx, set);
}
+const struct nft_set_ext_type nft_set_ext_types[] = {
+ [NFT_SET_EXT_KEY] = {
+ .len = sizeof(struct nft_data),
+ .align = __alignof__(struct nft_data),
+ },
+ [NFT_SET_EXT_DATA] = {
+ .len = sizeof(struct nft_data),
+ .align = __alignof__(struct nft_data),
+ },
+ [NFT_SET_EXT_FLAGS] = {
+ .len = sizeof(u8),
+ .align = __alignof__(u8),
+ },
+};
+EXPORT_SYMBOL_GPL(nft_set_ext_types);
+
/*
* Set elements
*/
const struct nft_set *set,
const struct nft_set_elem *elem)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
if (nest == NULL)
goto nla_put_failure;
- if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE,
- set->klen) < 0)
+ if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext),
+ NFT_DATA_VALUE, set->klen) < 0)
goto nla_put_failure;
- if (set->flags & NFT_SET_MAP &&
- !(elem->flags & NFT_SET_ELEM_INTERVAL_END) &&
- nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data,
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+ nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext),
set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
set->dlen) < 0)
goto nla_put_failure;
- if (elem->flags != 0)
- if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags)))
- goto nla_put_failure;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+ nla_put_be32(skb, NFTA_SET_ELEM_FLAGS,
+ htonl(*nft_set_ext_flags(ext))))
+ goto nla_put_failure;
nla_nest_end(skb, nest);
return 0;
return trans;
}
+static void *nft_set_elem_init(const struct nft_set *set,
+ const struct nft_set_ext_tmpl *tmpl,
+ const struct nft_data *key,
+ const struct nft_data *data,
+ gfp_t gfp)
+{
+ struct nft_set_ext *ext;
+ void *elem;
+
+ elem = kzalloc(set->ops->elemsize + tmpl->len, gfp);
+ if (elem == NULL)
+ return NULL;
+
+ ext = nft_set_elem_ext(set, elem);
+ nft_set_ext_init(ext, tmpl);
+
+ memcpy(nft_set_ext_key(ext), key, set->klen);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+ memcpy(nft_set_ext_data(ext), data, set->dlen);
+
+ return elem;
+}
+
+void nft_set_elem_destroy(const struct nft_set *set, void *elem)
+{
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+
+ nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+ nft_data_uninit(nft_set_ext_data(ext), set->dtype);
+
+ kfree(elem);
+}
+EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
+
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
struct nft_data_desc d1, d2;
+ struct nft_set_ext_tmpl tmpl;
+ struct nft_set_ext *ext;
struct nft_set_elem elem;
struct nft_set_binding *binding;
+ struct nft_data data;
enum nft_registers dreg;
struct nft_trans *trans;
+ u32 flags;
int err;
if (set->size && set->nelems == set->size)
if (nla[NFTA_SET_ELEM_KEY] == NULL)
return -EINVAL;
- elem.flags = 0;
+ nft_set_ext_prepare(&tmpl);
+
+ flags = 0;
if (nla[NFTA_SET_ELEM_FLAGS] != NULL) {
- elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
- if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END)
+ flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
+ if (flags & ~NFT_SET_ELEM_INTERVAL_END)
return -EINVAL;
if (!(set->flags & NFT_SET_INTERVAL) &&
- elem.flags & NFT_SET_ELEM_INTERVAL_END)
+ flags & NFT_SET_ELEM_INTERVAL_END)
return -EINVAL;
+ if (flags != 0)
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
}
if (set->flags & NFT_SET_MAP) {
if (nla[NFTA_SET_ELEM_DATA] == NULL &&
- !(elem.flags & NFT_SET_ELEM_INTERVAL_END))
+ !(flags & NFT_SET_ELEM_INTERVAL_END))
return -EINVAL;
if (nla[NFTA_SET_ELEM_DATA] != NULL &&
- elem.flags & NFT_SET_ELEM_INTERVAL_END)
+ flags & NFT_SET_ELEM_INTERVAL_END)
return -EINVAL;
} else {
if (nla[NFTA_SET_ELEM_DATA] != NULL)
if (d1.type != NFT_DATA_VALUE || d1.len != set->klen)
goto err2;
- err = -EEXIST;
- if (set->ops->get(set, &elem) == 0)
- goto err2;
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY);
if (nla[NFTA_SET_ELEM_DATA] != NULL) {
- err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]);
+ err = nft_data_init(ctx, &data, &d2, nla[NFTA_SET_ELEM_DATA]);
if (err < 0)
goto err2;
};
err = nft_validate_data_load(&bind_ctx, dreg,
- &elem.data, d2.type);
+ &data, d2.type);
if (err < 0)
goto err3;
}
+
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA);
}
+ err = -ENOMEM;
+ elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, GFP_KERNEL);
+ if (elem.priv == NULL)
+ goto err3;
+
+ ext = nft_set_elem_ext(set, elem.priv);
+ if (flags)
+ *nft_set_ext_flags(ext) = flags;
+
trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
if (trans == NULL)
- goto err3;
+ goto err4;
+ ext->genmask = nft_genmask_cur(ctx->net);
err = set->ops->insert(set, &elem);
if (err < 0)
- goto err4;
+ goto err5;
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
-err4:
+err5:
kfree(trans);
+err4:
+ kfree(elem.priv);
err3:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
- nft_data_uninit(&elem.data, d2.type);
+ nft_data_uninit(&data, d2.type);
err2:
nft_data_uninit(&elem.key, d1.type);
err1:
if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
goto err2;
- err = set->ops->get(set, &elem);
- if (err < 0)
- goto err2;
-
trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
if (trans == NULL) {
err = -ENOMEM;
goto err2;
}
+ elem.priv = set->ops->deactivate(set, &elem);
+ if (elem.priv == NULL) {
+ err = -ENOENT;
+ goto err3;
+ }
+
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
+
+err3:
+ kfree(trans);
err2:
nft_data_uninit(&elem.key, desc.type);
err1:
case NFT_MSG_DELSET:
nft_set_destroy(nft_trans_set(trans));
break;
+ case NFT_MSG_DELSETELEM:
+ nft_set_elem_destroy(nft_trans_elem_set(trans),
+ nft_trans_elem(trans).priv);
+ break;
}
kfree(trans);
}
while (++net->nft.base_seq == 0);
/* A new generation has just started */
- net->nft.gencursor = gencursor_next(net);
+ net->nft.gencursor = nft_gencursor_next(net);
/* Make sure all packets have left the previous generation before
* purging old rules.
NFT_MSG_DELSET, GFP_KERNEL);
break;
case NFT_MSG_NEWSETELEM:
- nf_tables_setelem_notify(&trans->ctx,
- nft_trans_elem_set(trans),
- &nft_trans_elem(trans),
+ te = (struct nft_trans_elem *)trans->data;
+
+ te->set->ops->activate(te->set, &te->elem);
+ nf_tables_setelem_notify(&trans->ctx, te->set,
+ &te->elem,
NFT_MSG_NEWSETELEM, 0);
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSETELEM:
te = (struct nft_trans_elem *)trans->data;
+
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
NFT_MSG_DELSETELEM, 0);
- te->set->ops->get(te->set, &te->elem);
- nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
- if (te->set->flags & NFT_SET_MAP &&
- !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_uninit(&te->elem.data, te->set->dtype);
te->set->ops->remove(te->set, &te->elem);
- nft_trans_destroy(trans);
break;
}
}
case NFT_MSG_NEWSET:
nft_set_destroy(nft_trans_set(trans));
break;
+ case NFT_MSG_NEWSETELEM:
+ nft_set_elem_destroy(nft_trans_elem_set(trans),
+ nft_trans_elem(trans).priv);
+ break;
}
kfree(trans);
}
case NFT_MSG_NEWSETELEM:
nft_trans_elem_set(trans)->nelems--;
te = (struct nft_trans_elem *)trans->data;
- te->set->ops->get(te->set, &te->elem);
- nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
- if (te->set->flags & NFT_SET_MAP &&
- !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_uninit(&te->elem.data, te->set->dtype);
+
te->set->ops->remove(te->set, &te->elem);
- nft_trans_destroy(trans);
break;
case NFT_MSG_DELSETELEM:
+ te = (struct nft_trans_elem *)trans->data;
+
nft_trans_elem_set(trans)->nelems++;
+ te->set->ops->activate(te->set, &te->elem);
+
nft_trans_destroy(trans);
break;
}
const struct nft_set_iter *iter,
const struct nft_set_elem *elem)
{
- if (elem->flags & NFT_SET_ELEM_INTERVAL_END)
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_data *data;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+ *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
return 0;
- switch (elem->data.verdict) {
+ data = nft_set_ext_data(ext);
+ switch (data->verdict) {
case NFT_JUMP:
case NFT_GOTO:
- return nf_tables_check_loops(ctx, elem->data.chain);
+ return nf_tables_check_loops(ctx, data->chain);
default:
return 0;
}
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/list.h>
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
- .level = 4,
+ .level = LOGLEVEL_WARNING,
.logflags = NF_LOG_MASK,
},
},
{
struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
- nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
- pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
- chain->table->name, chain->name, comments[type],
- rulenum);
+ nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
+ pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
+ chain->table->name, chain->name, comments[type],
+ rulenum);
}
static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
{
const struct nft_chain *chain = ops->priv, *basechain = chain;
+ const struct net *net = read_pnet(&nft_base_chain(basechain)->pnet);
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
struct nft_data data[NFT_REG_MAX + 1];
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
struct nft_stats *stats;
int rulenum;
- /*
- * Cache cursor to avoid problems in case that the cursor is updated
- * while traversing the ruleset.
- */
- unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor);
+ unsigned int gencursor = nft_genmask_cur(net);
do_chain:
rulenum = 0;
/* We target a hash table size of 4, element hint is 75% of final size */
#define NFT_HASH_ELEMENT_HINT 3
+struct nft_hash {
+ struct rhashtable ht;
+};
+
struct nft_hash_elem {
struct rhash_head node;
- struct nft_data key;
- struct nft_data data[];
+ struct nft_set_ext ext;
+};
+
+struct nft_hash_cmp_arg {
+ const struct nft_set *set;
+ const struct nft_data *key;
+ u8 genmask;
};
static const struct rhashtable_params nft_hash_params;
-static bool nft_hash_lookup(const struct nft_set *set,
- const struct nft_data *key,
- struct nft_data *data)
+static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
{
- struct rhashtable *priv = nft_set_priv(set);
- const struct nft_hash_elem *he;
-
- he = rhashtable_lookup_fast(priv, key, nft_hash_params);
- if (he && set->flags & NFT_SET_MAP)
- nft_data_copy(data, he->data);
+ const struct nft_hash_cmp_arg *arg = data;
- return !!he;
+ return jhash(arg->key, len, seed);
}
-static int nft_hash_insert(const struct nft_set *set,
- const struct nft_set_elem *elem)
+static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
{
- struct rhashtable *priv = nft_set_priv(set);
- struct nft_hash_elem *he;
- unsigned int size;
- int err;
+ const struct nft_hash_elem *he = data;
- if (elem->flags != 0)
- return -EINVAL;
+ return jhash(nft_set_ext_key(&he->ext), len, seed);
+}
- size = sizeof(*he);
- if (set->flags & NFT_SET_MAP)
- size += sizeof(he->data[0]);
+static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct nft_hash_cmp_arg *x = arg->key;
+ const struct nft_hash_elem *he = ptr;
- he = kzalloc(size, GFP_KERNEL);
- if (he == NULL)
- return -ENOMEM;
+ if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
+ return 1;
+ if (!nft_set_elem_active(&he->ext, x->genmask))
+ return 1;
+ return 0;
+}
- nft_data_copy(&he->key, &elem->key);
- if (set->flags & NFT_SET_MAP)
- nft_data_copy(he->data, &elem->data);
+static bool nft_hash_lookup(const struct nft_set *set,
+ const struct nft_data *key,
+ const struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_cur(read_pnet(&set->pnet)),
+ .set = set,
+ .key = key,
+ };
- err = rhashtable_insert_fast(priv, &he->node, nft_hash_params);
- if (err)
- kfree(he);
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL)
+ *ext = &he->ext;
- return err;
+ return !!he;
}
-static void nft_hash_elem_destroy(const struct nft_set *set,
- struct nft_hash_elem *he)
+static int nft_hash_insert(const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
- nft_data_uninit(&he->key, NFT_DATA_VALUE);
- if (set->flags & NFT_SET_MAP)
- nft_data_uninit(he->data, set->dtype);
- kfree(he);
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->priv;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_next(read_pnet(&set->pnet)),
+ .set = set,
+ .key = &elem->key,
+ };
+
+ return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params);
}
-static void nft_hash_remove(const struct nft_set *set,
- const struct nft_set_elem *elem)
+static void nft_hash_activate(const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
- struct rhashtable *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->priv;
- rhashtable_remove_fast(priv, elem->cookie, nft_hash_params);
- synchronize_rcu();
- kfree(elem->cookie);
+ nft_set_elem_change_active(set, &he->ext);
}
-static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
+static void *nft_hash_deactivate(const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
- struct rhashtable *priv = nft_set_priv(set);
+ struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_next(read_pnet(&set->pnet)),
+ .set = set,
+ .key = &elem->key,
+ };
- he = rhashtable_lookup_fast(priv, &elem->key, nft_hash_params);
- if (!he)
- return -ENOENT;
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL)
+ nft_set_elem_change_active(set, &he->ext);
- elem->cookie = he;
- elem->flags = 0;
- if (set->flags & NFT_SET_MAP)
- nft_data_copy(&elem->data, he->data);
+ return he;
+}
- return 0;
+static void nft_hash_remove(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->priv;
+
+ rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
}
static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
struct nft_set_iter *iter)
{
- struct rhashtable *priv = nft_set_priv(set);
- const struct nft_hash_elem *he;
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
struct rhashtable_iter hti;
struct nft_set_elem elem;
+ u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
int err;
- err = rhashtable_walk_init(priv, &hti);
+ err = rhashtable_walk_init(&priv->ht, &hti);
iter->err = err;
if (err)
return;
if (iter->count < iter->skip)
goto cont;
+ if (!nft_set_elem_active(&he->ext, genmask))
+ goto cont;
- memcpy(&elem.key, &he->key, sizeof(elem.key));
- if (set->flags & NFT_SET_MAP)
- memcpy(&elem.data, he->data, sizeof(elem.data));
- elem.flags = 0;
+ elem.priv = he;
iter->err = iter->fn(ctx, set, iter, &elem);
if (iter->err < 0)
static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
{
- return sizeof(struct rhashtable);
+ return sizeof(struct nft_hash);
}
static const struct rhashtable_params nft_hash_params = {
- .head_offset = offsetof(struct nft_hash_elem, node),
- .key_offset = offsetof(struct nft_hash_elem, key),
- .hashfn = jhash,
- .automatic_shrinking = true,
+ .head_offset = offsetof(struct nft_hash_elem, node),
+ .hashfn = nft_hash_key,
+ .obj_hashfn = nft_hash_obj,
+ .obj_cmpfn = nft_hash_cmp,
+ .automatic_shrinking = true,
};
static int nft_hash_init(const struct nft_set *set,
const struct nft_set_desc *desc,
const struct nlattr * const tb[])
{
- struct rhashtable *priv = nft_set_priv(set);
+ struct nft_hash *priv = nft_set_priv(set);
struct rhashtable_params params = nft_hash_params;
params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
- params.key_len = set->klen;
+ params.key_len = set->klen;
- return rhashtable_init(priv, ¶ms);
+ return rhashtable_init(&priv->ht, ¶ms);
}
-static void nft_free_element(void *ptr, void *arg)
+static void nft_hash_elem_destroy(void *ptr, void *arg)
{
- nft_hash_elem_destroy((const struct nft_set *)arg, ptr);
+ nft_set_elem_destroy((const struct nft_set *)arg, ptr);
}
static void nft_hash_destroy(const struct nft_set *set)
{
- rhashtable_free_and_destroy(nft_set_priv(set), nft_free_element,
+ struct nft_hash *priv = nft_set_priv(set);
+
+ rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
(void *)set);
}
unsigned int esize;
esize = sizeof(struct nft_hash_elem);
- if (features & NFT_SET_MAP)
- esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
-
if (desc->size) {
- est->size = sizeof(struct rhashtable) +
+ est->size = sizeof(struct nft_hash) +
roundup_pow_of_two(desc->size * 4 / 3) *
sizeof(struct nft_hash_elem *) +
desc->size * esize;
static struct nft_set_ops nft_hash_ops __read_mostly = {
.privsize = nft_hash_privsize,
+ .elemsize = offsetof(struct nft_hash_elem, ext),
.estimate = nft_hash_estimate,
.init = nft_hash_init,
.destroy = nft_hash_destroy,
- .get = nft_hash_get,
.insert = nft_hash_insert,
+ .activate = nft_hash_activate,
+ .deactivate = nft_hash_deactivate,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
.walk = nft_hash_walk,
li->u.log.level =
ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL]));
} else {
- li->u.log.level = 4;
+ li->u.log.level = LOGLEVEL_WARNING;
}
if (tb[NFTA_LOG_FLAGS] != NULL) {
li->u.log.logflags =
{
const struct nft_lookup *priv = nft_expr_priv(expr);
const struct nft_set *set = priv->set;
+ const struct nft_set_ext *ext;
- if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg]))
+ if (set->ops->lookup(set, &data[priv->sreg], &ext)) {
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(&data[priv->dreg], nft_set_ext_data(ext));
return;
+ }
data[NFT_REG_VERDICT].verdict = NFT_BREAK;
}
}
break;
case NFT_META_CPU:
- dest->data[0] = smp_processor_id();
+ dest->data[0] = raw_smp_processor_id();
break;
case NFT_META_IIFGROUP:
if (in == NULL)
struct nft_rbtree_elem {
struct rb_node node;
- u16 flags;
- struct nft_data key;
- struct nft_data data[];
+ struct nft_set_ext ext;
};
+
static bool nft_rbtree_lookup(const struct nft_set *set,
const struct nft_data *key,
- struct nft_data *data)
+ const struct nft_set_ext **ext)
{
const struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
const struct rb_node *parent;
+ u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
int d;
spin_lock_bh(&nft_rbtree_lock);
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = nft_data_cmp(&rbe->key, key, set->klen);
+ d = nft_data_cmp(nft_set_ext_key(&rbe->ext), key, set->klen);
if (d < 0) {
parent = parent->rb_left;
interval = rbe;
parent = parent->rb_right;
else {
found:
- if (rbe->flags & NFT_SET_ELEM_INTERVAL_END)
+ if (!nft_set_elem_active(&rbe->ext, genmask)) {
+ parent = parent->rb_left;
+ continue;
+ }
+ if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
+ *nft_set_ext_flags(&rbe->ext) &
+ NFT_SET_ELEM_INTERVAL_END)
goto out;
- if (set->flags & NFT_SET_MAP)
- nft_data_copy(data, rbe->data);
-
spin_unlock_bh(&nft_rbtree_lock);
+
+ *ext = &rbe->ext;
return true;
}
}
return false;
}
-static void nft_rbtree_elem_destroy(const struct nft_set *set,
- struct nft_rbtree_elem *rbe)
-{
- nft_data_uninit(&rbe->key, NFT_DATA_VALUE);
- if (set->flags & NFT_SET_MAP &&
- !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_uninit(rbe->data, set->dtype);
-
- kfree(rbe);
-}
-
static int __nft_rbtree_insert(const struct nft_set *set,
struct nft_rbtree_elem *new)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
struct rb_node *parent, **p;
+ u8 genmask = nft_genmask_next(read_pnet(&set->pnet));
int d;
parent = NULL;
while (*p != NULL) {
parent = *p;
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = nft_data_cmp(&rbe->key, &new->key, set->klen);
+ d = nft_data_cmp(nft_set_ext_key(&rbe->ext),
+ nft_set_ext_key(&new->ext),
+ set->klen);
if (d < 0)
p = &parent->rb_left;
else if (d > 0)
p = &parent->rb_right;
- else
- return -EEXIST;
+ else {
+ if (nft_set_elem_active(&rbe->ext, genmask))
+ return -EEXIST;
+ p = &parent->rb_left;
+ }
}
rb_link_node(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
static int nft_rbtree_insert(const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct nft_rbtree_elem *rbe;
- unsigned int size;
+ struct nft_rbtree_elem *rbe = elem->priv;
int err;
- size = sizeof(*rbe);
- if (set->flags & NFT_SET_MAP &&
- !(elem->flags & NFT_SET_ELEM_INTERVAL_END))
- size += sizeof(rbe->data[0]);
-
- rbe = kzalloc(size, GFP_KERNEL);
- if (rbe == NULL)
- return -ENOMEM;
-
- rbe->flags = elem->flags;
- nft_data_copy(&rbe->key, &elem->key);
- if (set->flags & NFT_SET_MAP &&
- !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_copy(rbe->data, &elem->data);
-
spin_lock_bh(&nft_rbtree_lock);
err = __nft_rbtree_insert(set, rbe);
- if (err < 0)
- kfree(rbe);
-
spin_unlock_bh(&nft_rbtree_lock);
+
return err;
}
const struct nft_set_elem *elem)
{
struct nft_rbtree *priv = nft_set_priv(set);
- struct nft_rbtree_elem *rbe = elem->cookie;
+ struct nft_rbtree_elem *rbe = elem->priv;
spin_lock_bh(&nft_rbtree_lock);
rb_erase(&rbe->node, &priv->root);
spin_unlock_bh(&nft_rbtree_lock);
- kfree(rbe);
}
-static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
+static void nft_rbtree_activate(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_rbtree_elem *rbe = elem->priv;
+
+ nft_set_elem_change_active(set, &rbe->ext);
+}
+
+static void *nft_rbtree_deactivate(const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
struct nft_rbtree_elem *rbe;
+ u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
int d;
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = nft_data_cmp(&rbe->key, &elem->key, set->klen);
+ d = nft_data_cmp(nft_set_ext_key(&rbe->ext), &elem->key,
+ set->klen);
if (d < 0)
parent = parent->rb_left;
else if (d > 0)
parent = parent->rb_right;
else {
- elem->cookie = rbe;
- if (set->flags & NFT_SET_MAP &&
- !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_copy(&elem->data, rbe->data);
- elem->flags = rbe->flags;
- return 0;
+ if (!nft_set_elem_active(&rbe->ext, genmask)) {
+ parent = parent->rb_left;
+ continue;
+ }
+ nft_set_elem_change_active(set, &rbe->ext);
+ return rbe;
}
}
- return -ENOENT;
+ return NULL;
}
static void nft_rbtree_walk(const struct nft_ctx *ctx,
struct nft_set_iter *iter)
{
const struct nft_rbtree *priv = nft_set_priv(set);
- const struct nft_rbtree_elem *rbe;
+ struct nft_rbtree_elem *rbe;
struct nft_set_elem elem;
struct rb_node *node;
+ u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
spin_lock_bh(&nft_rbtree_lock);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
+
if (iter->count < iter->skip)
goto cont;
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ goto cont;
- rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nft_data_copy(&elem.key, &rbe->key);
- if (set->flags & NFT_SET_MAP &&
- !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_copy(&elem.data, rbe->data);
- elem.flags = rbe->flags;
+ elem.priv = rbe;
iter->err = iter->fn(ctx, set, iter, &elem);
if (iter->err < 0) {
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nft_rbtree_elem_destroy(set, rbe);
+ nft_set_elem_destroy(set, rbe);
}
}
unsigned int nsize;
nsize = sizeof(struct nft_rbtree_elem);
- if (features & NFT_SET_MAP)
- nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]);
-
if (desc->size)
est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
else
static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.privsize = nft_rbtree_privsize,
+ .elemsize = offsetof(struct nft_rbtree_elem, ext),
.estimate = nft_rbtree_estimate,
.init = nft_rbtree_init,
.destroy = nft_rbtree_destroy,
.insert = nft_rbtree_insert,
.remove = nft_rbtree_remove,
- .get = nft_rbtree_get,
+ .deactivate = nft_rbtree_deactivate,
+ .activate = nft_rbtree_activate,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
.features = NFT_SET_INTERVAL | NFT_SET_MAP,
.exit = netlink_net_exit,
};
-static inline u32 netlink_hash(const void *data, u32 seed)
+static inline u32 netlink_hash(const void *data, u32 len, u32 seed)
{
const struct netlink_sock *nlk = data;
struct netlink_compare_arg arg;
static void tipc_bclink_unlock(struct net *net)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_node *node = NULL;
- if (likely(!tn->bclink->flags)) {
- spin_unlock_bh(&tn->bclink->lock);
- return;
- }
-
- if (tn->bclink->flags & TIPC_BCLINK_RESET) {
- tn->bclink->flags &= ~TIPC_BCLINK_RESET;
- node = tipc_bclink_retransmit_to(net);
- }
spin_unlock_bh(&tn->bclink->lock);
-
- if (node)
- tipc_link_reset_all(node);
}
void tipc_bclink_input(struct net *net)
return MAX_PKT_DEFAULT_MCAST;
}
-void tipc_bclink_set_flags(struct net *net, unsigned int flags)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
-
- tn->bclink->flags |= flags;
-}
-
static u32 bcbuf_acks(struct sk_buff *buf)
{
return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle;
seqno : node->bclink.last_sent;
}
-
/**
* tipc_bclink_retransmit_to - get most recent node to request retransmission
*
return;
tipc_node_lock(n_ptr);
-
if (n_ptr->bclink.recv_permitted &&
(n_ptr->bclink.last_in != n_ptr->bclink.last_sent) &&
(n_ptr->bclink.last_in == msg_bcgap_after(msg)))
n_ptr->bclink.oos_state = 2;
-
tipc_node_unlock(n_ptr);
+ tipc_node_put(n_ptr);
}
/* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster
goto unlock;
if (msg_destnode(msg) == tn->own_addr) {
tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
- tipc_node_unlock(node);
tipc_bclink_lock(net);
bcl->stats.recv_nacks++;
tn->bclink->retransmit_to = node;
bclink_retransmit_pkt(tn, msg_bcgap_after(msg),
msg_bcgap_to(msg));
tipc_bclink_unlock(net);
+ tipc_node_unlock(node);
} else {
tipc_node_unlock(node);
bclink_peek_nack(net, msg);
}
+ tipc_node_put(node);
goto exit;
}
unlock:
tipc_node_unlock(node);
+ tipc_node_put(node);
exit:
kfree_skb(buf);
}
struct tipc_bearer *secondary;
};
-#define TIPC_BCLINK_RESET 1
#define BCBEARER MAX_BEARERS
/**
* @lock: spinlock governing access to structure
* @link: (non-standard) broadcast link structure
* @node: (non-standard) node structure representing b'cast link's peer node
- * @flags: represent bclink states
* @bcast_nodes: map of broadcast-capable nodes
* @retransmit_to: node that most recently requested a retransmit
*
spinlock_t lock;
struct tipc_link link;
struct tipc_node node;
- unsigned int flags;
struct sk_buff_head arrvq;
struct sk_buff_head inputq;
struct tipc_node_map bcast_nodes;
int tipc_bclink_init(struct net *net);
void tipc_bclink_stop(struct net *net);
-void tipc_bclink_set_flags(struct net *tn, unsigned int flags);
void tipc_bclink_add_node(struct net *net, u32 addr);
void tipc_bclink_remove_node(struct net *net, u32 addr);
struct tipc_node *tipc_bclink_retransmit_to(struct net *tn);
}
}
tipc_node_unlock(node);
+ tipc_node_put(node);
}
/**
if (link)
rc = __tipc_link_xmit(net, link, list);
tipc_node_unlock(node);
+ tipc_node_put(node);
}
if (link)
return rc;
(unsigned long) TIPC_SKB_CB(buf)->handle);
n_ptr = tipc_bclink_retransmit_to(net);
- tipc_node_lock(n_ptr);
tipc_addr_string_fill(addr_string, n_ptr->addr);
pr_info("Broadcast link info for %s\n", addr_string);
n_ptr->bclink.oos_state,
n_ptr->bclink.last_sent);
- tipc_node_unlock(n_ptr);
-
- tipc_bclink_set_flags(net, TIPC_BCLINK_RESET);
+ n_ptr->action_flags |= TIPC_BCAST_RESET;
l_ptr->stale_count = 0;
}
}
n_ptr = tipc_node_find(net, msg_prevnode(msg));
if (unlikely(!n_ptr))
goto discard;
- tipc_node_lock(n_ptr);
+ tipc_node_lock(n_ptr);
/* Locate unicast link endpoint that should handle message */
l_ptr = n_ptr->links[b_ptr->identity];
if (unlikely(!l_ptr))
skb = NULL;
unlock:
tipc_node_unlock(n_ptr);
+ tipc_node_put(n_ptr);
discard:
if (unlikely(skb))
kfree_skb(skb);
msg.seq = cb->nlh->nlmsg_seq;
rcu_read_lock();
-
if (prev_node) {
node = tipc_node_find(net, prev_node);
if (!node) {
cb->prev_seq = 1;
goto out;
}
+ tipc_node_put(node);
list_for_each_entry_continue_rcu(node, &tn->node_list,
list) {
err = __tipc_nl_add_node_links(net, &msg, node,
&prev_link);
tipc_node_unlock(node);
+ tipc_node_put(node);
if (err)
goto out;
tipc_node_lock(node);
list_add_tail(&publ->nodesub_list, &node->publ_list);
tipc_node_unlock(node);
+ tipc_node_put(node);
}
static void tipc_publ_unsubscribe(struct net *net, struct publication *publ,
tipc_node_lock(node);
list_del_init(&publ->nodesub_list);
tipc_node_unlock(node);
+ tipc_node_put(node);
}
/**
static void node_lost_contact(struct tipc_node *n_ptr);
static void node_established_contact(struct tipc_node *n_ptr);
+static void tipc_node_delete(struct tipc_node *node);
struct tipc_sock_conn {
u32 port;
return addr & (NODE_HTABLE_SIZE - 1);
}
+static void tipc_node_kref_release(struct kref *kref)
+{
+ struct tipc_node *node = container_of(kref, struct tipc_node, kref);
+
+ tipc_node_delete(node);
+}
+
+void tipc_node_put(struct tipc_node *node)
+{
+ kref_put(&node->kref, tipc_node_kref_release);
+}
+
+static void tipc_node_get(struct tipc_node *node)
+{
+ kref_get(&node->kref);
+}
+
/*
* tipc_node_find - locate specified node object, if it exists
*/
hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)],
hash) {
if (node->addr == addr) {
+ tipc_node_get(node);
rcu_read_unlock();
return node;
}
}
n_ptr->addr = addr;
n_ptr->net = net;
+ kref_init(&n_ptr->kref);
spin_lock_init(&n_ptr->lock);
INIT_HLIST_NODE(&n_ptr->hash);
INIT_LIST_HEAD(&n_ptr->list);
list_add_tail_rcu(&n_ptr->list, &temp_node->list);
n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN;
n_ptr->signature = INVALID_NODE_SIG;
+ tipc_node_get(n_ptr);
exit:
spin_unlock_bh(&tn->node_list_lock);
return n_ptr;
}
-static void tipc_node_delete(struct tipc_net *tn, struct tipc_node *n_ptr)
+static void tipc_node_delete(struct tipc_node *node)
{
- list_del_rcu(&n_ptr->list);
- hlist_del_rcu(&n_ptr->hash);
- kfree_rcu(n_ptr, rcu);
+ list_del_rcu(&node->list);
+ hlist_del_rcu(&node->hash);
+ kfree_rcu(node, rcu);
}
void tipc_node_stop(struct net *net)
spin_lock_bh(&tn->node_list_lock);
list_for_each_entry_safe(node, t_node, &tn->node_list, list)
- tipc_node_delete(tn, node);
+ tipc_node_put(node);
spin_unlock_bh(&tn->node_list_lock);
}
{
struct tipc_node *node;
struct tipc_sock_conn *conn;
+ int err = 0;
if (in_own_node(net, dnode))
return 0;
return -EHOSTUNREACH;
}
conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
- if (!conn)
- return -EHOSTUNREACH;
+ if (!conn) {
+ err = -EHOSTUNREACH;
+ goto exit;
+ }
conn->peer_node = dnode;
conn->port = port;
conn->peer_port = peer_port;
tipc_node_lock(node);
list_add_tail(&conn->list, &node->conn_sks);
tipc_node_unlock(node);
- return 0;
+exit:
+ tipc_node_put(node);
+ return err;
}
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
kfree(conn);
}
tipc_node_unlock(node);
+ tipc_node_put(node);
}
/**
char *linkname, size_t len)
{
struct tipc_link *link;
+ int err = -EINVAL;
struct tipc_node *node = tipc_node_find(net, addr);
- if ((bearer_id >= MAX_BEARERS) || !node)
- return -EINVAL;
+ if (!node)
+ return err;
+
+ if (bearer_id >= MAX_BEARERS)
+ goto exit;
+
tipc_node_lock(node);
link = node->links[bearer_id];
if (link) {
strncpy(linkname, link->name, len);
- tipc_node_unlock(node);
- return 0;
+ err = 0;
}
+exit:
tipc_node_unlock(node);
- return -EINVAL;
+ tipc_node_put(node);
+ return err;
}
void tipc_node_unlock(struct tipc_node *node)
TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP |
TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT |
- TIPC_NAMED_MSG_EVT);
+ TIPC_NAMED_MSG_EVT | TIPC_BCAST_RESET);
spin_unlock_bh(&node->lock);
if (flags & TIPC_BCAST_MSG_EVT)
tipc_bclink_input(net);
+
+ if (flags & TIPC_BCAST_RESET)
+ tipc_link_reset_all(node);
}
/* Caller should hold node lock for the passed node */
msg.seq = cb->nlh->nlmsg_seq;
rcu_read_lock();
-
- if (last_addr && !tipc_node_find(net, last_addr)) {
- rcu_read_unlock();
- /* We never set seq or call nl_dump_check_consistent() this
- * means that setting prev_seq here will cause the consistence
- * check to fail in the netlink callback handler. Resulting in
- * the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if
- * the node state changed while we released the lock.
- */
- cb->prev_seq = 1;
- return -EPIPE;
+ if (last_addr) {
+ node = tipc_node_find(net, last_addr);
+ if (!node) {
+ rcu_read_unlock();
+ /* We never set seq or call nl_dump_check_consistent()
+ * this means that setting prev_seq here will cause the
+ * consistence check to fail in the netlink callback
+ * handler. Resulting in the NLMSG_DONE message having
+ * the NLM_F_DUMP_INTR flag set if the node state
+ * changed while we released the lock.
+ */
+ cb->prev_seq = 1;
+ return -EPIPE;
+ }
+ tipc_node_put(node);
}
list_for_each_entry_rcu(node, &tn->node_list, list) {
TIPC_NOTIFY_LINK_UP = (1 << 6),
TIPC_NOTIFY_LINK_DOWN = (1 << 7),
TIPC_NAMED_MSG_EVT = (1 << 8),
- TIPC_BCAST_MSG_EVT = (1 << 9)
+ TIPC_BCAST_MSG_EVT = (1 << 9),
+ TIPC_BCAST_RESET = (1 << 10)
};
/**
/**
* struct tipc_node - TIPC node structure
* @addr: network address of node
+ * @ref: reference counter to node object
* @lock: spinlock governing access to structure
* @net: the applicable net namespace
* @hash: links to adjacent nodes in unsorted hash chain
*/
struct tipc_node {
u32 addr;
+ struct kref kref;
spinlock_t lock;
struct net *net;
struct hlist_node hash;
};
struct tipc_node *tipc_node_find(struct net *net, u32 addr);
+void tipc_node_put(struct tipc_node *node);
struct tipc_node *tipc_node_create(struct net *net, u32 addr);
void tipc_node_stop(struct net *net);
void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
node = tipc_node_find(net, addr);
- if (likely(node))
+ if (likely(node)) {
mtu = node->act_mtus[selector & 1];
- else
+ tipc_node_put(node);
+ } else {
mtu = MAX_MSG_SIZE;
+ }
return mtu;
}