Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next...

author David S. Miller <davem@davemloft.net>

Fri, 23 Oct 2015 13:58:09 +0000 (06:58 -0700)

committer David S. Miller <davem@davemloft.net>

Fri, 23 Oct 2015 13:58:09 +0000 (06:58 -0700)
author David S. Miller <davem@davemloft.net>
Fri, 23 Oct 2015 13:58:09 +0000 (06:58 -0700)
committer David S. Miller <davem@davemloft.net>
Fri, 23 Oct 2015 13:58:09 +0000 (06:58 -0700)
diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c

index c29aebe1e62b59801ca7bb2aed36f9c502733fbf..9093577755f69bac2c548518922b1b63215a62a6 100644 (file)
--- a/drivers/net/dsa/mv88e6060.c
+++ b/drivers/net/dsa/mv88e6060.c
@@ -26,7 +26,7 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg)
         if (bus == NULL)
                 return -EINVAL;
  
-       return mdiobus_read(bus, ds->pd->sw_addr + addr, reg);
+       return mdiobus_read_nested(bus, ds->pd->sw_addr + addr, reg);
  }
  
  #define REG_READ(addr, reg)                                    \
@@ -47,7 +47,7 @@ static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
         if (bus == NULL)
                 return -EINVAL;
  
-       return mdiobus_write(bus, ds->pd->sw_addr + addr, reg, val);
+       return mdiobus_write_nested(bus, ds->pd->sw_addr + addr, reg, val);
  }
  
  #define REG_WRITE(addr, reg, val)                              \
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c

index 7af96309bc07ac7460bfe5041a81800e25f33b3d..b1b14f519d8b195a28e608e49bb94bb751e226e3 100644 (file)
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -24,34 +24,6 @@
  #include <net/switchdev.h>
  #include "mv88e6xxx.h"
  
-/* MDIO bus access can be nested in the case of PHYs connected to the
- * internal MDIO bus of the switch, which is accessed via MDIO bus of
- * the Ethernet interface. Avoid lockdep false positives by using
- * mutex_lock_nested().
- */
-static int mv88e6xxx_mdiobus_read(struct mii_bus *bus, int addr, u32 regnum)
-{
-       int ret;
-
-       mutex_lock_nested(&bus->mdio_lock, SINGLE_DEPTH_NESTING);
-       ret = bus->read(bus, addr, regnum);
-       mutex_unlock(&bus->mdio_lock);
-
-       return ret;
-}
-
-static int mv88e6xxx_mdiobus_write(struct mii_bus *bus, int addr, u32 regnum,
-                                  u16 val)
-{
-       int ret;
-
-       mutex_lock_nested(&bus->mdio_lock, SINGLE_DEPTH_NESTING);
-       ret = bus->write(bus, addr, regnum, val);
-       mutex_unlock(&bus->mdio_lock);
-
-       return ret;
-}
-
  /* If the switch's ADDR[4:0] strap pins are strapped to zero, it will
   * use all 32 SMI bus addresses on its SMI bus, and all switch registers
   * will be directly accessible on some {device address,register address}
@@ -66,7 +38,7 @@ static int mv88e6xxx_reg_wait_ready(struct mii_bus *bus, int sw_addr)
         int i;
  
         for (i = 0; i < 16; i++) {
-               ret = mv88e6xxx_mdiobus_read(bus, sw_addr, SMI_CMD);
+               ret = mdiobus_read_nested(bus, sw_addr, SMI_CMD);
                 if (ret < 0)
                         return ret;
  
@@ -82,7 +54,7 @@ int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg)
         int ret;
  
         if (sw_addr == 0)
-               return mv88e6xxx_mdiobus_read(bus, addr, reg);
+               return mdiobus_read_nested(bus, addr, reg);
  
         /* Wait for the bus to become free. */
         ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
@@ -90,8 +62,8 @@ int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg)
                 return ret;
  
         /* Transmit the read command. */
-       ret = mv88e6xxx_mdiobus_write(bus, sw_addr, SMI_CMD,
-                                     SMI_CMD_OP_22_READ | (addr << 5) | reg);
+       ret = mdiobus_write_nested(bus, sw_addr, SMI_CMD,
+                                  SMI_CMD_OP_22_READ | (addr << 5) | reg);
         if (ret < 0)
                 return ret;
  
@@ -101,7 +73,7 @@ int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg)
                 return ret;
  
         /* Read the data. */
-       ret = mv88e6xxx_mdiobus_read(bus, sw_addr, SMI_DATA);
+       ret = mdiobus_read_nested(bus, sw_addr, SMI_DATA);
         if (ret < 0)
                 return ret;
  
@@ -145,7 +117,7 @@ int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
         int ret;
  
         if (sw_addr == 0)
-               return mv88e6xxx_mdiobus_write(bus, addr, reg, val);
+               return mdiobus_write_nested(bus, addr, reg, val);
  
         /* Wait for the bus to become free. */
         ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
@@ -153,13 +125,13 @@ int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
                 return ret;
  
         /* Transmit the data to write. */
-       ret = mv88e6xxx_mdiobus_write(bus, sw_addr, SMI_DATA, val);
+       ret = mdiobus_write_nested(bus, sw_addr, SMI_DATA, val);
         if (ret < 0)
                 return ret;
  
         /* Transmit the write command. */
-       ret = mv88e6xxx_mdiobus_write(bus, sw_addr, SMI_CMD,
-                                     SMI_CMD_OP_22_WRITE | (addr << 5) | reg);
+       ret = mdiobus_write_nested(bus, sw_addr, SMI_CMD,
+                                  SMI_CMD_OP_22_WRITE | (addr << 5) | reg);
         if (ret < 0)
                 return ret;
  
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c

index 3b75adfb3f379e4c89c560d18a2eee39f75f7e65..55d2d8577d075719cc28e61430bbb1414e3750c8 100644 (file)
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2583,17 +2583,7 @@ static struct platform_driver cpsw_driver = {
         .remove = cpsw_remove,
  };
  
-static int __init cpsw_init(void)
-{
-       return platform_driver_register(&cpsw_driver);
-}
-late_initcall(cpsw_init);
-
-static void __exit cpsw_exit(void)
-{
-       platform_driver_unregister(&cpsw_driver);
-}
-module_exit(cpsw_exit);
+module_platform_driver(cpsw_driver);
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Cyril Chemparathy <cyril@ti.com>");
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c

index 12f44c53cc8ebca7cba92119c26b01b249e31846..88cb4592b6fbbc6cc1ed6812b0fad2d17885a17e 100644 (file)
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -371,6 +371,33 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr)
  }
  EXPORT_SYMBOL(mdiobus_scan);
  
+/**
+ * mdiobus_read_nested - Nested version of the mdiobus_read function
+ * @bus: the mii_bus struct
+ * @addr: the phy address
+ * @regnum: register number to read
+ *
+ * In case of nested MDIO bus access avoid lockdep false positives by
+ * using mutex_lock_nested().
+ *
+ * NOTE: MUST NOT be called from interrupt context,
+ * because the bus read/write functions may wait for an interrupt
+ * to conclude the operation.
+ */
+int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum)
+{
+       int retval;
+
+       BUG_ON(in_interrupt());
+
+       mutex_lock_nested(&bus->mdio_lock, SINGLE_DEPTH_NESTING);
+       retval = bus->read(bus, addr, regnum);
+       mutex_unlock(&bus->mdio_lock);
+
+       return retval;
+}
+EXPORT_SYMBOL(mdiobus_read_nested);
+
  /**
   * mdiobus_read - Convenience function for reading a given MII mgmt register
   * @bus: the mii_bus struct
@@ -395,6 +422,34 @@ int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum)
  }
  EXPORT_SYMBOL(mdiobus_read);
  
+/**
+ * mdiobus_write_nested - Nested version of the mdiobus_write function
+ * @bus: the mii_bus struct
+ * @addr: the phy address
+ * @regnum: register number to write
+ * @val: value to write to @regnum
+ *
+ * In case of nested MDIO bus access avoid lockdep false positives by
+ * using mutex_lock_nested().
+ *
+ * NOTE: MUST NOT be called from interrupt context,
+ * because the bus read/write functions may wait for an interrupt
+ * to conclude the operation.
+ */
+int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val)
+{
+       int err;
+
+       BUG_ON(in_interrupt());
+
+       mutex_lock_nested(&bus->mdio_lock, SINGLE_DEPTH_NESTING);
+       err = bus->write(bus, addr, regnum, val);
+       mutex_unlock(&bus->mdio_lock);
+
+       return err;
+}
+EXPORT_SYMBOL(mdiobus_write_nested);
+
  /**
   * mdiobus_write - Convenience function for writing a given MII mgmt register
   * @bus: the mii_bus struct
diff --git a/include/linux/phy.h b/include/linux/phy.h

index 4c477e6ece33356530da3d38210df2820d2be04d..05fde31b6dc6dbe2f97356cea09a961dc1cd3af0 100644 (file)
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -213,7 +213,9 @@ static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev)
  void devm_mdiobus_free(struct device *dev, struct mii_bus *bus);
  struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr);
  int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum);
+int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum);
  int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
+int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val);
  
  
  #define PHY_INTERRUPT_DISABLED 0x0
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h

index 63615709839d1c958142e0c4ad27512aa570a1da..481fe1c9044cfd8b49585139e24df16b0716debf 100644 (file)
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -43,7 +43,9 @@ struct inet_connection_sock_af_ops {
         int         (*conn_request)(struct sock *sk, struct sk_buff *skb);
         struct sock *(*syn_recv_sock)(const struct sock *sk, struct sk_buff *skb,
                                       struct request_sock *req,
-                                     struct dst_entry *dst);
+                                     struct dst_entry *dst,
+                                     struct request_sock *req_unhash,
+                                     bool *own_req);
         u16         net_header_len;
         u16         net_frag_header_len;
         u16         sockaddr_len;
@@ -272,6 +274,9 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
                               struct sock *child);
  void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
                                    unsigned long timeout);
+struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
+                                        struct request_sock *req,
+                                        bool own_req);
  
  static inline void inet_csk_reqsk_queue_added(struct sock *sk)
  {
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h

index 6683ada25fefae509e95d57bfd3dcee2a6845640..de2e3ade61028cc9937861a6218e3f26ff4a1321 100644 (file)
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -205,8 +205,8 @@ void inet_put_port(struct sock *sk);
  
  void inet_hashinfo_init(struct inet_hashinfo *h);
  
-int inet_ehash_insert(struct sock *sk, struct sock *osk);
-void __inet_hash_nolisten(struct sock *sk, struct sock *osk);
+bool inet_ehash_insert(struct sock *sk, struct sock *osk);
+bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
  void __inet_hash(struct sock *sk, struct sock *osk);
  void inet_hash(struct sock *sk);
  void inet_unhash(struct sock *sk);
diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h

index 4757997f76edf4f8871a1b2d8407954b5c17abb5..179253f9dcfd986ef806331044bc4973f1cc7d6e 100644 (file)
--- a/include/net/mpls_iptunnel.h
+++ b/include/net/mpls_iptunnel.h
@@ -18,7 +18,7 @@
  
  struct mpls_iptunnel_encap {
         u32     label[MAX_NEW_LABELS];
-       u32     labels;
+       u8      labels;
  };
  
  static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate)
diff --git a/include/net/tcp.h b/include/net/tcp.h

index 11e3204122167e55d4a6bcf142f5ac34d5ccb85d..f80e74c5ad18b22c274ecd7e75b6a23ffe7268b4 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -457,7 +457,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
  void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst);
  struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
                                   struct request_sock *req,
-                                 struct dst_entry *dst);
+                                 struct dst_entry *dst,
+                                 struct request_sock *req_unhash,
+                                 bool *own_req);
  int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
  int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
  int tcp_connect(struct sock *sk);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h

index 923f5a180134ee0b180ca86a389d3f4a59d56b8d..b0e28d24e1a749ce1cfa7204a7cbdd201da8368f 100644 (file)
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -278,7 +278,9 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
  
  struct sock *dccp_v4_request_recv_sock(const struct sock *sk, struct sk_buff *skb,
                                        struct request_sock *req,
-                                      struct dst_entry *dst);
+                                      struct dst_entry *dst,
+                                      struct request_sock *req_unhash,
+                                      bool *own_req);
  struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
                             struct request_sock *req);
  
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c

index 59bc180b02d8cb1dcc2e7c2681e2c86864bc4e63..5684e14932bd47e97b9d547307bfc50230e7be7d 100644 (file)
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -393,7 +393,9 @@ static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb)
  struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
                                        struct sk_buff *skb,
                                        struct request_sock *req,
-                                      struct dst_entry *dst)
+                                      struct dst_entry *dst,
+                                      struct request_sock *req_unhash,
+                                      bool *own_req)
  {
         struct inet_request_sock *ireq;
         struct inet_sock *newinet;
@@ -426,7 +428,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
  
         if (__inet_inherit_port(sk, newsk) < 0)
                 goto put_and_exit;
-       __inet_hash_nolisten(newsk, NULL);
+       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
  
         return newsk;
  
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c

index d9cc731f261974c80b405f461c2c4224154caa8f..ef4e48ce9143073872adc2c2816f3b0eb3665a4b 100644 (file)
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -380,7 +380,9 @@ drop:
  static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
                                               struct sk_buff *skb,
                                               struct request_sock *req,
-                                             struct dst_entry *dst)
+                                             struct dst_entry *dst,
+                                             struct request_sock *req_unhash,
+                                             bool *own_req)
  {
         struct inet_request_sock *ireq = inet_rsk(req);
         struct ipv6_pinfo *newnp;
@@ -393,7 +395,8 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
                 /*
                  *      v6 mapped
                  */
-               newsk = dccp_v4_request_recv_sock(sk, skb, req, dst);
+               newsk = dccp_v4_request_recv_sock(sk, skb, req, dst,
+                                                 req_unhash, own_req);
                 if (newsk == NULL)
                         return NULL;
  
@@ -511,7 +514,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
                 dccp_done(newsk);
                 goto out;
         }
-       __inet_hash(newsk, NULL);
+       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
  
         return newsk;
  
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c

index d10aace43672a962080c68b20c2fa9ba36d8ac83..1994f8af646b15fe668c01b207567f9016865c4f 100644 (file)
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -143,6 +143,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
  {
         struct sock *child = NULL;
         struct dccp_request_sock *dreq = dccp_rsk(req);
+       bool own_req;
  
         /* Check for retransmitted REQUEST */
         if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
@@ -182,14 +183,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
         if (dccp_parse_options(sk, dreq, skb))
                  goto drop;
  
-       child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
-       if (child == NULL)
+       child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
+                                                        req, &own_req);
+       if (!child)
                 goto listen_overflow;
  
-       inet_csk_reqsk_queue_drop(sk, req);
-       inet_csk_reqsk_queue_add(sk, req, child);
-out:
-       return child;
+       return inet_csk_complete_hashdance(sk, child, req, own_req);
+
  listen_overflow:
         dccp_pr_debug("listen_overflow!\n");
         DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
@@ -198,7 +198,7 @@ drop:
                 req->rsk_ops->send_reset(sk, skb);
  
         inet_csk_reqsk_queue_drop(sk, req);
-       goto out;
+       return NULL;
  }
  
  EXPORT_SYMBOL_GPL(dccp_check_req);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c

index 8430bc8ccd58c5f666cb0e5d0abeb2ca21e0156e..1feb15f23de8c4f673fd0fe713df2dd9195995cf 100644 (file)
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -523,15 +523,15 @@ static bool reqsk_queue_unlink(struct request_sock_queue *queue,
                                struct request_sock *req)
  {
         struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
-       spinlock_t *lock;
-       bool found;
+       bool found = false;
  
-       lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
-
-       spin_lock(lock);
-       found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
-       spin_unlock(lock);
+       if (sk_hashed(req_to_sk(req))) {
+               spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
  
+               spin_lock(lock);
+               found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
+               spin_unlock(lock);
+       }
         if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
                 reqsk_put(req);
         return found;
@@ -811,6 +811,25 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
  }
  EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
  
+struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
+                                        struct request_sock *req, bool own_req)
+{
+       if (own_req) {
+               inet_csk_reqsk_queue_drop(sk, req);
+               reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
+               inet_csk_reqsk_queue_add(sk, req, child);
+               /* Warning: caller must not call reqsk_put(req);
+                * child stole last reference on it.
+                */
+               return child;
+       }
+       /* Too bad, another child took ownership of the request, undo. */
+       bh_unlock_sock(child);
+       sock_put(child);
+       return NULL;
+}
+EXPORT_SYMBOL(inet_csk_complete_hashdance);
+
  /*
   *     This routine closes sockets which have been at least partially
   *     opened, but not yet accepted.
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c

index 958728a22001bb514cf47b5a421690062131bcba..ccc5980797fcdb9ed3a1003db47e4e8cb7180279 100644 (file)
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -407,13 +407,13 @@ static u32 inet_sk_port_offset(const struct sock *sk)
  /* insert a socket into ehash, and eventually remove another one
   * (The another one can be a SYN_RECV or TIMEWAIT
   */
-int inet_ehash_insert(struct sock *sk, struct sock *osk)
+bool inet_ehash_insert(struct sock *sk, struct sock *osk)
  {
         struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
         struct hlist_nulls_head *list;
         struct inet_ehash_bucket *head;
         spinlock_t *lock;
-       int ret = 0;
+       bool ret = true;
  
         WARN_ON_ONCE(!sk_unhashed(sk));
  
@@ -423,30 +423,41 @@ int inet_ehash_insert(struct sock *sk, struct sock *osk)
         lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
  
         spin_lock(lock);
-       __sk_nulls_add_node_rcu(sk, list);
         if (osk) {
-               WARN_ON(sk->sk_hash != osk->sk_hash);
-               sk_nulls_del_node_init_rcu(osk);
+               WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
+               ret = sk_nulls_del_node_init_rcu(osk);
         }
+       if (ret)
+               __sk_nulls_add_node_rcu(sk, list);
         spin_unlock(lock);
         return ret;
  }
  
-void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
+bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
  {
-       inet_ehash_insert(sk, osk);
-       sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+       bool ok = inet_ehash_insert(sk, osk);
+
+       if (ok) {
+               sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+       } else {
+               percpu_counter_inc(sk->sk_prot->orphan_count);
+               sk->sk_state = TCP_CLOSE;
+               sock_set_flag(sk, SOCK_DEAD);
+               inet_csk_destroy_sock(sk);
+       }
+       return ok;
  }
-EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
+EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
  
  void __inet_hash(struct sock *sk, struct sock *osk)
  {
         struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
         struct inet_listen_hashbucket *ilb;
  
-       if (sk->sk_state != TCP_LISTEN)
-               return __inet_hash_nolisten(sk, osk);
-
+       if (sk->sk_state != TCP_LISTEN) {
+               inet_ehash_nolisten(sk, osk);
+               return;
+       }
         WARN_ON(!sk_unhashed(sk));
         ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
  
@@ -567,7 +578,7 @@ ok:
                 inet_bind_hash(sk, tb, port);
                 if (sk_unhashed(sk)) {
                         inet_sk(sk)->inet_sport = htons(port);
-                       __inet_hash_nolisten(sk, (struct sock *)tw);
+                       inet_ehash_nolisten(sk, (struct sock *)tw);
                 }
                 if (tw)
                         inet_twsk_bind_unhash(tw, hinfo);
@@ -584,7 +595,7 @@ ok:
         tb  = inet_csk(sk)->icsk_bind_hash;
         spin_lock_bh(&head->lock);
         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-               __inet_hash_nolisten(sk, NULL);
+               inet_ehash_nolisten(sk, NULL);
                 spin_unlock_bh(&head->lock);
                 return 0;
         } else {
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c

index 4c0892badb8b1eb47881b8c24976a872f3c61c6c..4cbe9f0a428179d8c35fa5f0a05dd2b445498c11 100644 (file)
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -221,8 +221,10 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  {
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct sock *child;
+       bool own_req;
  
-       child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
+       child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
+                                                NULL, &own_req);
         if (child) {
                 atomic_set(&req->rsk_refcnt, 1);
                 sock_rps_save_rxhash(child, skb);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c

index 93396bf7b475f3972d247d282faa406d962696dd..55be6ac70cff3679cd7a80aa9aaac48ac156a203 100644 (file)
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -133,12 +133,14 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
         struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
         struct sock *child;
         u32 end_seq;
+       bool own_req;
  
         req->num_retrans = 0;
         req->num_timeout = 0;
         req->sk = NULL;
  
-       child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
+       child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
+                                                        NULL, &own_req);
         if (!child)
                 return NULL;
  
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index 30dd45c1f568a0e986b89752a9f0839ea5f8d5bb..1c2648bbac4b22b55739dde4d92dd2ca0533f77a 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1247,7 +1247,9 @@ EXPORT_SYMBOL(tcp_v4_conn_request);
   */
  struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
                                   struct request_sock *req,
-                                 struct dst_entry *dst)
+                                 struct dst_entry *dst,
+                                 struct request_sock *req_unhash,
+                                 bool *own_req)
  {
         struct inet_request_sock *ireq;
         struct inet_sock *newinet;
@@ -1323,7 +1325,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
  
         if (__inet_inherit_port(sk, newsk) < 0)
                 goto put_and_exit;
-       __inet_hash_nolisten(newsk, NULL);
+       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
  
         return newsk;
  
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c

index 1fd5d413a6642b526c98edc0144b3ceed503bb9d..3575dd1e5b6775ad8a35bb3ce0e951bc01e37e7c 100644 (file)
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -580,6 +580,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
         const struct tcphdr *th = tcp_hdr(skb);
         __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
         bool paws_reject = false;
+       bool own_req;
  
         tmp_opt.saw_tstamp = 0;
         if (th->doff > (sizeof(struct tcphdr)>>2)) {
@@ -767,18 +768,14 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
          * ESTABLISHED STATE. If it will be dropped after
          * socket is created, wait for troubles.
          */
-       child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
+       child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
+                                                        req, &own_req);
         if (!child)
                 goto listen_overflow;
  
         sock_rps_save_rxhash(child, skb);
         tcp_synack_rtt_meas(child, req);
-       inet_csk_reqsk_queue_drop(sk, req);
-       inet_csk_reqsk_queue_add(sk, req, child);
-       /* Warning: caller must not call reqsk_put(req);
-        * child stole last reference on it.
-        */
-       return child;
+       return inet_csk_complete_hashdance(sk, child, req, own_req);
  
  listen_overflow:
         if (!sysctl_tcp_abort_on_overflow) {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c

index f495d189f5e022263bca20d01b10afdb0be06059..714bc5ad096e9beda5226c4caf8ddd4efae038bf 100644 (file)
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -965,7 +965,9 @@ drop:
  
  static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
                                          struct request_sock *req,
-                                        struct dst_entry *dst)
+                                        struct dst_entry *dst,
+                                        struct request_sock *req_unhash,
+                                        bool *own_req)
  {
         struct inet_request_sock *ireq;
         struct ipv6_pinfo *newnp;
@@ -984,7 +986,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
                  *      v6 mapped
                  */
  
-               newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
+               newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
+                                            req_unhash, own_req);
  
                 if (!newsk)
                         return NULL;
@@ -1145,7 +1148,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
                 tcp_done(newsk);
                 goto out;
         }
-       __inet_hash(newsk, NULL);
+       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
  
         return newsk;
  
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c

index bb185a28de9890d2f4b3c57d1ca7af7600f9b2aa..cc972e30355b600769051dc3841ba7fa582b42a5 100644 (file)
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -19,36 +19,13 @@
  #include <net/ipv6.h>
  #include <net/addrconf.h>
  #endif
+#include <net/nexthop.h>
  #include "internal.h"
  
-#define LABEL_NOT_SPECIFIED (1<<20)
-#define MAX_NEW_LABELS 2
-
-/* This maximum ha length copied from the definition of struct neighbour */
-#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
-
-enum mpls_payload_type {
-       MPT_UNSPEC, /* IPv4 or IPv6 */
-       MPT_IPV4 = 4,
-       MPT_IPV6 = 6,
-
-       /* Other types not implemented:
-        *  - Pseudo-wire with or without control word (RFC4385)
-        *  - GAL (RFC5586)
-        */
-};
-
-struct mpls_route { /* next hop label forwarding entry */
-       struct net_device __rcu *rt_dev;
-       struct rcu_head         rt_rcu;
-       u32                     rt_label[MAX_NEW_LABELS];
-       u8                      rt_protocol; /* routing protocol that set this entry */
-       u8                      rt_payload_type;
-       u8                      rt_labels;
-       u8                      rt_via_alen;
-       u8                      rt_via_table;
-       u8                      rt_via[0];
-};
+/* Maximum number of labels to look ahead at when selecting a path of
+ * a multipath route
+ */
+#define MAX_MP_SELECT_LABELS 4
  
  static int zero = 0;
  static int label_limit = (1 << 20) - 1;
@@ -80,10 +57,10 @@ bool mpls_output_possible(const struct net_device *dev)
  }
  EXPORT_SYMBOL_GPL(mpls_output_possible);
  
-static unsigned int mpls_rt_header_size(const struct mpls_route *rt)
+static unsigned int mpls_nh_header_size(const struct mpls_nh *nh)
  {
         /* The size of the layer 2.5 labels to be added for this route */
-       return rt->rt_labels * sizeof(struct mpls_shim_hdr);
+       return nh->nh_labels * sizeof(struct mpls_shim_hdr);
  }
  
  unsigned int mpls_dev_mtu(const struct net_device *dev)
@@ -105,6 +82,80 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
  }
  EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
  
+static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
+                                            struct sk_buff *skb, bool bos)
+{
+       struct mpls_entry_decoded dec;
+       struct mpls_shim_hdr *hdr;
+       bool eli_seen = false;
+       int label_index;
+       int nh_index = 0;
+       u32 hash = 0;
+
+       /* No need to look further into packet if there's only
+        * one path
+        */
+       if (rt->rt_nhn == 1)
+               goto out;
+
+       for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos;
+            label_index++) {
+               if (!pskb_may_pull(skb, sizeof(*hdr) * label_index))
+                       break;
+
+               /* Read and decode the current label */
+               hdr = mpls_hdr(skb) + label_index;
+               dec = mpls_entry_decode(hdr);
+
+               /* RFC6790 - reserved labels MUST NOT be used as keys
+                * for the load-balancing function
+                */
+               if (likely(dec.label >= MPLS_LABEL_FIRST_UNRESERVED)) {
+                       hash = jhash_1word(dec.label, hash);
+
+                       /* The entropy label follows the entropy label
+                        * indicator, so this means that the entropy
+                        * label was just added to the hash - no need to
+                        * go any deeper either in the label stack or in the
+                        * payload
+                        */
+                       if (eli_seen)
+                               break;
+               } else if (dec.label == MPLS_LABEL_ENTROPY) {
+                       eli_seen = true;
+               }
+
+               bos = dec.bos;
+               if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index +
+                                        sizeof(struct iphdr))) {
+                       const struct iphdr *v4hdr;
+
+                       v4hdr = (const struct iphdr *)(mpls_hdr(skb) +
+                                                      label_index);
+                       if (v4hdr->version == 4) {
+                               hash = jhash_3words(ntohl(v4hdr->saddr),
+                                                   ntohl(v4hdr->daddr),
+                                                   v4hdr->protocol, hash);
+                       } else if (v4hdr->version == 6 &&
+                               pskb_may_pull(skb, sizeof(*hdr) * label_index +
+                                             sizeof(struct ipv6hdr))) {
+                               const struct ipv6hdr *v6hdr;
+
+                               v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) +
+                                                               label_index);
+
+                               hash = __ipv6_addr_jhash(&v6hdr->saddr, hash);
+                               hash = __ipv6_addr_jhash(&v6hdr->daddr, hash);
+                               hash = jhash_1word(v6hdr->nexthdr, hash);
+                       }
+               }
+       }
+
+       nh_index = hash % rt->rt_nhn;
+out:
+       return &rt->rt_nh[nh_index];
+}
+
  static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
                         struct mpls_entry_decoded dec)
  {
@@ -159,6 +210,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
         struct net *net = dev_net(dev);
         struct mpls_shim_hdr *hdr;
         struct mpls_route *rt;
+       struct mpls_nh *nh;
         struct mpls_entry_decoded dec;
         struct net_device *out_dev;
         struct mpls_dev *mdev;
@@ -196,8 +248,12 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
         if (!rt)
                 goto drop;
  
+       nh = mpls_select_multipath(rt, skb, dec.bos);
+       if (!nh)
+               goto drop;
+
         /* Find the output device */
-       out_dev = rcu_dereference(rt->rt_dev);
+       out_dev = rcu_dereference(nh->nh_dev);
         if (!mpls_output_possible(out_dev))
                 goto drop;
  
@@ -212,7 +268,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
         dec.ttl -= 1;
  
         /* Verify the destination can hold the packet */
-       new_header_size = mpls_rt_header_size(rt);
+       new_header_size = mpls_nh_header_size(nh);
         mtu = mpls_dev_mtu(out_dev);
         if (mpls_pkt_too_big(skb, mtu - new_header_size))
                 goto drop;
@@ -240,13 +296,14 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
                 /* Push the new labels */
                 hdr = mpls_hdr(skb);
                 bos = dec.bos;
-               for (i = rt->rt_labels - 1; i >= 0; i--) {
-                       hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos);
+               for (i = nh->nh_labels - 1; i >= 0; i--) {
+                       hdr[i] = mpls_entry_encode(nh->nh_label[i],
+                                                  dec.ttl, 0, bos);
                         bos = false;
                 }
         }
  
-       err = neigh_xmit(rt->rt_via_table, out_dev, rt->rt_via, skb);
+       err = neigh_xmit(nh->nh_via_table, out_dev, nh->nh_via, skb);
         if (err)
                 net_dbg_ratelimited("%s: packet transmission failed: %d\n",
                                     __func__, err);
@@ -270,24 +327,28 @@ static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
  struct mpls_route_config {
         u32                     rc_protocol;
         u32                     rc_ifindex;
-       u16                     rc_via_table;
-       u16                     rc_via_alen;
+       u8                      rc_via_table;
+       u8                      rc_via_alen;
         u8                      rc_via[MAX_VIA_ALEN];
         u32                     rc_label;
-       u32                     rc_output_labels;
+       u8                      rc_output_labels;
         u32                     rc_output_label[MAX_NEW_LABELS];
         u32                     rc_nlflags;
         enum mpls_payload_type  rc_payload_type;
         struct nl_info          rc_nlinfo;
+       struct rtnexthop        *rc_mp;
+       int                     rc_mp_len;
  };
  
-static struct mpls_route *mpls_rt_alloc(size_t alen)
+static struct mpls_route *mpls_rt_alloc(int num_nh)
  {
         struct mpls_route *rt;
  
-       rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL);
+       rt = kzalloc(sizeof(*rt) + (num_nh * sizeof(struct mpls_nh)),
+                    GFP_KERNEL);
         if (rt)
-               rt->rt_via_alen = alen;
+               rt->rt_nhn = num_nh;
+
         return rt;
  }
  
@@ -312,25 +373,22 @@ static void mpls_notify_route(struct net *net, unsigned index,
  }
  
  static void mpls_route_update(struct net *net, unsigned index,
-                             struct net_device *dev, struct mpls_route *new,
+                             struct mpls_route *new,
                               const struct nl_info *info)
  {
         struct mpls_route __rcu **platform_label;
-       struct mpls_route *rt, *old = NULL;
+       struct mpls_route *rt;
  
         ASSERT_RTNL();
  
         platform_label = rtnl_dereference(net->mpls.platform_label);
         rt = rtnl_dereference(platform_label[index]);
-       if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) {
-               rcu_assign_pointer(platform_label[index], new);
-               old = rt;
-       }
+       rcu_assign_pointer(platform_label[index], new);
  
-       mpls_notify_route(net, index, old, new, info);
+       mpls_notify_route(net, index, rt, new, info);
  
         /* If we removed a route free it now */
-       mpls_rt_free(old);
+       mpls_rt_free(rt);
  }
  
  static unsigned find_free_label(struct net *net)
@@ -406,40 +464,199 @@ static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
  #endif
  
  static struct net_device *find_outdev(struct net *net,
-                                     struct mpls_route_config *cfg)
+                                     struct mpls_nh *nh, int oif)
  {
         struct net_device *dev = NULL;
  
-       if (!cfg->rc_ifindex) {
-               switch (cfg->rc_via_table) {
+       if (!oif) {
+               switch (nh->nh_via_table) {
                 case NEIGH_ARP_TABLE:
-                       dev = inet_fib_lookup_dev(net, cfg->rc_via);
+                       dev = inet_fib_lookup_dev(net, nh->nh_via);
                         break;
                 case NEIGH_ND_TABLE:
-                       dev = inet6_fib_lookup_dev(net, cfg->rc_via);
+                       dev = inet6_fib_lookup_dev(net, nh->nh_via);
                         break;
                 case NEIGH_LINK_TABLE:
                         break;
                 }
         } else {
-               dev = dev_get_by_index(net, cfg->rc_ifindex);
+               dev = dev_get_by_index(net, oif);
         }
  
         if (!dev)
                 return ERR_PTR(-ENODEV);
  
+       /* The caller is holding rtnl anyways, so release the dev reference */
+       dev_put(dev);
+
         return dev;
  }
  
+static int mpls_nh_assign_dev(struct net *net, struct mpls_nh *nh, int oif)
+{
+       struct net_device *dev = NULL;
+       int err = -ENODEV;
+
+       dev = find_outdev(net, nh, oif);
+       if (IS_ERR(dev)) {
+               err = PTR_ERR(dev);
+               dev = NULL;
+               goto errout;
+       }
+
+       /* Ensure this is a supported device */
+       err = -EINVAL;
+       if (!mpls_dev_get(dev))
+               goto errout;
+
+       RCU_INIT_POINTER(nh->nh_dev, dev);
+
+       return 0;
+
+errout:
+       return err;
+}
+
+static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
+                                 struct mpls_route *rt)
+{
+       struct net *net = cfg->rc_nlinfo.nl_net;
+       struct mpls_nh *nh = rt->rt_nh;
+       int err;
+       int i;
+
+       if (!nh)
+               return -ENOMEM;
+
+       err = -EINVAL;
+       /* Ensure only a supported number of labels are present */
+       if (cfg->rc_output_labels > MAX_NEW_LABELS)
+               goto errout;
+
+       nh->nh_labels = cfg->rc_output_labels;
+       for (i = 0; i < nh->nh_labels; i++)
+               nh->nh_label[i] = cfg->rc_output_label[i];
+
+       nh->nh_via_table = cfg->rc_via_table;
+       memcpy(nh->nh_via, cfg->rc_via, cfg->rc_via_alen);
+       nh->nh_via_alen = cfg->rc_via_alen;
+
+       err = mpls_nh_assign_dev(net, nh, cfg->rc_ifindex);
+       if (err)
+               goto errout;
+
+       return 0;
+
+errout:
+       return err;
+}
+
+static int mpls_nh_build(struct net *net, struct mpls_nh *nh,
+                        int oif, struct nlattr *via, struct nlattr *newdst)
+{
+       int err = -ENOMEM;
+
+       if (!nh)
+               goto errout;
+
+       if (newdst) {
+               err = nla_get_labels(newdst, MAX_NEW_LABELS,
+                                    &nh->nh_labels, nh->nh_label);
+               if (err)
+                       goto errout;
+       }
+
+       err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table,
+                         nh->nh_via);
+       if (err)
+               goto errout;
+
+       err = mpls_nh_assign_dev(net, nh, oif);
+       if (err)
+               goto errout;
+
+       return 0;
+
+errout:
+       return err;
+}
+
+static int mpls_count_nexthops(struct rtnexthop *rtnh, int len)
+{
+       int nhs = 0;
+       int remaining = len;
+
+       while (rtnh_ok(rtnh, remaining)) {
+               nhs++;
+               rtnh = rtnh_next(rtnh, &remaining);
+       }
+
+       /* leftover implies invalid nexthop configuration, discard it */
+       return remaining > 0 ? 0 : nhs;
+}
+
+static int mpls_nh_build_multi(struct mpls_route_config *cfg,
+                              struct mpls_route *rt)
+{
+       struct rtnexthop *rtnh = cfg->rc_mp;
+       struct nlattr *nla_via, *nla_newdst;
+       int remaining = cfg->rc_mp_len;
+       int nhs = 0;
+       int err = 0;
+
+       change_nexthops(rt) {
+               int attrlen;
+
+               nla_via = NULL;
+               nla_newdst = NULL;
+
+               err = -EINVAL;
+               if (!rtnh_ok(rtnh, remaining))
+                       goto errout;
+
+               /* neither weighted multipath nor any flags
+                * are supported
+                */
+               if (rtnh->rtnh_hops || rtnh->rtnh_flags)
+                       goto errout;
+
+               attrlen = rtnh_attrlen(rtnh);
+               if (attrlen > 0) {
+                       struct nlattr *attrs = rtnh_attrs(rtnh);
+
+                       nla_via = nla_find(attrs, attrlen, RTA_VIA);
+                       nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST);
+               }
+
+               if (!nla_via)
+                       goto errout;
+
+               err = mpls_nh_build(cfg->rc_nlinfo.nl_net, nh,
+                                   rtnh->rtnh_ifindex, nla_via,
+                                   nla_newdst);
+               if (err)
+                       goto errout;
+
+               rtnh = rtnh_next(rtnh, &remaining);
+               nhs++;
+       } endfor_nexthops(rt);
+
+       rt->rt_nhn = nhs;
+
+       return 0;
+
+errout:
+       return err;
+}
+
  static int mpls_route_add(struct mpls_route_config *cfg)
  {
         struct mpls_route __rcu **platform_label;
         struct net *net = cfg->rc_nlinfo.nl_net;
-       struct net_device *dev = NULL;
         struct mpls_route *rt, *old;
-       unsigned index;
-       int i;
         int err = -EINVAL;
+       unsigned index;
+       int nhs = 1; /* default to one nexthop */
  
         index = cfg->rc_label;
  
@@ -457,27 +674,6 @@ static int mpls_route_add(struct mpls_route_config *cfg)
         if (index >= net->mpls.platform_labels)
                 goto errout;
  
-       /* Ensure only a supported number of labels are present */
-       if (cfg->rc_output_labels > MAX_NEW_LABELS)
-               goto errout;
-
-       dev = find_outdev(net, cfg);
-       if (IS_ERR(dev)) {
-               err = PTR_ERR(dev);
-               dev = NULL;
-               goto errout;
-       }
-
-       /* Ensure this is a supported device */
-       err = -EINVAL;
-       if (!mpls_dev_get(dev))
-               goto errout;
-
-       err = -EINVAL;
-       if ((cfg->rc_via_table == NEIGH_LINK_TABLE) &&
-           (dev->addr_len != cfg->rc_via_alen))
-               goto errout;
-
         /* Append makes no sense with mpls */
         err = -EOPNOTSUPP;
         if (cfg->rc_nlflags & NLM_F_APPEND)
@@ -497,28 +693,35 @@ static int mpls_route_add(struct mpls_route_config *cfg)
         if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old)
                 goto errout;
  
+       if (cfg->rc_mp) {
+               err = -EINVAL;
+               nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len);
+               if (nhs == 0)
+                       goto errout;
+       }
+
         err = -ENOMEM;
-       rt = mpls_rt_alloc(cfg->rc_via_alen);
+       rt = mpls_rt_alloc(nhs);
         if (!rt)
                 goto errout;
  
-       rt->rt_labels = cfg->rc_output_labels;
-       for (i = 0; i < rt->rt_labels; i++)
-               rt->rt_label[i] = cfg->rc_output_label[i];
         rt->rt_protocol = cfg->rc_protocol;
-       RCU_INIT_POINTER(rt->rt_dev, dev);
         rt->rt_payload_type = cfg->rc_payload_type;
-       rt->rt_via_table = cfg->rc_via_table;
-       memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen);
  
-       mpls_route_update(net, index, NULL, rt, &cfg->rc_nlinfo);
+       if (cfg->rc_mp)
+               err = mpls_nh_build_multi(cfg, rt);
+       else
+               err = mpls_nh_build_from_cfg(cfg, rt);
+       if (err)
+               goto freert;
+
+       mpls_route_update(net, index, rt, &cfg->rc_nlinfo);
  
-       dev_put(dev);
         return 0;
  
+freert:
+       mpls_rt_free(rt);
  errout:
-       if (dev)
-               dev_put(dev);
         return err;
  }
  
@@ -538,7 +741,7 @@ static int mpls_route_del(struct mpls_route_config *cfg)
         if (index >= net->mpls.platform_labels)
                 goto errout;
  
-       mpls_route_update(net, index, NULL, NULL, &cfg->rc_nlinfo);
+       mpls_route_update(net, index, NULL, &cfg->rc_nlinfo);
  
         err = 0;
  errout:
@@ -635,9 +838,11 @@ static void mpls_ifdown(struct net_device *dev)
                 struct mpls_route *rt = rtnl_dereference(platform_label[index]);
                 if (!rt)
                         continue;
-               if (rtnl_dereference(rt->rt_dev) != dev)
-                       continue;
-               rt->rt_dev = NULL;
+               for_nexthops(rt) {
+                       if (rtnl_dereference(nh->nh_dev) != dev)
+                               continue;
+                       nh->nh_dev = NULL;
+               } endfor_nexthops(rt);
         }
  
         mdev = mpls_dev_get(dev);
@@ -736,7 +941,7 @@ int nla_put_labels(struct sk_buff *skb, int attrtype,
  EXPORT_SYMBOL_GPL(nla_put_labels);
  
  int nla_get_labels(const struct nlattr *nla,
-                  u32 max_labels, u32 *labels, u32 label[])
+                  u32 max_labels, u8 *labels, u32 label[])
  {
         unsigned len = nla_len(nla);
         unsigned nla_labels;
@@ -781,6 +986,48 @@ int nla_get_labels(const struct nlattr *nla,
  }
  EXPORT_SYMBOL_GPL(nla_get_labels);
  
+int nla_get_via(const struct nlattr *nla, u8 *via_alen,
+               u8 *via_table, u8 via_addr[])
+{
+       struct rtvia *via = nla_data(nla);
+       int err = -EINVAL;
+       int alen;
+
+       if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
+               goto errout;
+       alen = nla_len(nla) -
+                       offsetof(struct rtvia, rtvia_addr);
+       if (alen > MAX_VIA_ALEN)
+               goto errout;
+
+       /* Validate the address family */
+       switch (via->rtvia_family) {
+       case AF_PACKET:
+               *via_table = NEIGH_LINK_TABLE;
+               break;
+       case AF_INET:
+               *via_table = NEIGH_ARP_TABLE;
+               if (alen != 4)
+                       goto errout;
+               break;
+       case AF_INET6:
+               *via_table = NEIGH_ND_TABLE;
+               if (alen != 16)
+                       goto errout;
+               break;
+       default:
+               /* Unsupported address family */
+               goto errout;
+       }
+
+       memcpy(via_addr, via->rtvia_addr, alen);
+       *via_alen = alen;
+       err = 0;
+
+errout:
+       return err;
+}
+
  static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
                                struct mpls_route_config *cfg)
  {
@@ -844,7 +1091,7 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
                         break;
                 case RTA_DST:
                 {
-                       u32 label_count;
+                       u8 label_count;
                         if (nla_get_labels(nla, 1, &label_count,
                                            &cfg->rc_label))
                                 goto errout;
@@ -857,35 +1104,15 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
                 }
                 case RTA_VIA:
                 {
-                       struct rtvia *via = nla_data(nla);
-                       if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
+                       if (nla_get_via(nla, &cfg->rc_via_alen,
+                                       &cfg->rc_via_table, cfg->rc_via))
                                 goto errout;
-                       cfg->rc_via_alen   = nla_len(nla) -
-                               offsetof(struct rtvia, rtvia_addr);
-                       if (cfg->rc_via_alen > MAX_VIA_ALEN)
-                               goto errout;
-
-                       /* Validate the address family */
-                       switch(via->rtvia_family) {
-                       case AF_PACKET:
-                               cfg->rc_via_table = NEIGH_LINK_TABLE;
-                               break;
-                       case AF_INET:
-                               cfg->rc_via_table = NEIGH_ARP_TABLE;
-                               if (cfg->rc_via_alen != 4)
-                                       goto errout;
-                               break;
-                       case AF_INET6:
-                               cfg->rc_via_table = NEIGH_ND_TABLE;
-                               if (cfg->rc_via_alen != 16)
-                                       goto errout;
-                               break;
-                       default:
-                               /* Unsupported address family */
-                               goto errout;
-                       }
-
-                       memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen);
+                       break;
+               }
+               case RTA_MULTIPATH:
+               {
+                       cfg->rc_mp = nla_data(nla);
+                       cfg->rc_mp_len = nla_len(nla);
                         break;
                 }
                 default:
@@ -946,16 +1173,52 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
         rtm->rtm_type = RTN_UNICAST;
         rtm->rtm_flags = 0;
  
-       if (rt->rt_labels &&
-           nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label))
-               goto nla_put_failure;
-       if (nla_put_via(skb, rt->rt_via_table, rt->rt_via, rt->rt_via_alen))
-               goto nla_put_failure;
-       dev = rtnl_dereference(rt->rt_dev);
-       if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
-               goto nla_put_failure;
         if (nla_put_labels(skb, RTA_DST, 1, &label))
                 goto nla_put_failure;
+       if (rt->rt_nhn == 1) {
+               struct mpls_nh *nh = rt->rt_nh;
+
+               if (nh->nh_labels &&
+                   nla_put_labels(skb, RTA_NEWDST, nh->nh_labels,
+                                  nh->nh_label))
+                       goto nla_put_failure;
+               if (nla_put_via(skb, nh->nh_via_table, nh->nh_via,
+                               nh->nh_via_alen))
+                       goto nla_put_failure;
+               dev = rtnl_dereference(nh->nh_dev);
+               if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
+                       goto nla_put_failure;
+       } else {
+               struct rtnexthop *rtnh;
+               struct nlattr *mp;
+
+               mp = nla_nest_start(skb, RTA_MULTIPATH);
+               if (!mp)
+                       goto nla_put_failure;
+
+               for_nexthops(rt) {
+                       rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
+                       if (!rtnh)
+                               goto nla_put_failure;
+
+                       dev = rtnl_dereference(nh->nh_dev);
+                       if (dev)
+                               rtnh->rtnh_ifindex = dev->ifindex;
+                       if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST,
+                                                           nh->nh_labels,
+                                                           nh->nh_label))
+                               goto nla_put_failure;
+                       if (nla_put_via(skb, nh->nh_via_table,
+                                       nh->nh_via,
+                                       nh->nh_via_alen))
+                               goto nla_put_failure;
+
+                       /* length of rtnetlink header + attributes */
+                       rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
+               } endfor_nexthops(rt);
+
+               nla_nest_end(skb, mp);
+       }
  
         nlmsg_end(skb, nlh);
         return 0;
@@ -1000,12 +1263,30 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
  {
         size_t payload =
                 NLMSG_ALIGN(sizeof(struct rtmsg))
-               + nla_total_size(2 + rt->rt_via_alen)   /* RTA_VIA */
                 + nla_total_size(4);                    /* RTA_DST */
-       if (rt->rt_labels)                              /* RTA_NEWDST */
-               payload += nla_total_size(rt->rt_labels * 4);
-       if (rt->rt_dev)                                 /* RTA_OIF */
-               payload += nla_total_size(4);
+
+       if (rt->rt_nhn == 1) {
+               struct mpls_nh *nh = rt->rt_nh;
+
+               if (nh->nh_dev)
+                       payload += nla_total_size(4); /* RTA_OIF */
+               payload += nla_total_size(2 + nh->nh_via_alen); /* RTA_VIA */
+               if (nh->nh_labels) /* RTA_NEWDST */
+                       payload += nla_total_size(nh->nh_labels * 4);
+       } else {
+               /* each nexthop is packed in an attribute */
+               size_t nhsize = 0;
+
+               for_nexthops(rt) {
+                       nhsize += nla_total_size(sizeof(struct rtnexthop));
+                       nhsize += nla_total_size(2 + nh->nh_via_alen);
+                       if (nh->nh_labels)
+                               nhsize += nla_total_size(nh->nh_labels * 4);
+               } endfor_nexthops(rt);
+               /* nested attribute */
+               payload += nla_total_size(nhsize);
+       }
+
         return payload;
  }
  
@@ -1057,25 +1338,25 @@ static int resize_platform_label_table(struct net *net, size_t limit)
         /* In case the predefined labels need to be populated */
         if (limit > MPLS_LABEL_IPV4NULL) {
                 struct net_device *lo = net->loopback_dev;
-               rt0 = mpls_rt_alloc(lo->addr_len);
+               rt0 = mpls_rt_alloc(1);
                 if (!rt0)
                         goto nort0;
-               RCU_INIT_POINTER(rt0->rt_dev, lo);
+               RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo);
                 rt0->rt_protocol = RTPROT_KERNEL;
                 rt0->rt_payload_type = MPT_IPV4;
-               rt0->rt_via_table = NEIGH_LINK_TABLE;
-               memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len);
+               rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
+               memcpy(rt0->rt_nh->nh_via, lo->dev_addr, lo->addr_len);
         }
         if (limit > MPLS_LABEL_IPV6NULL) {
                 struct net_device *lo = net->loopback_dev;
-               rt2 = mpls_rt_alloc(lo->addr_len);
+               rt2 = mpls_rt_alloc(1);
                 if (!rt2)
                         goto nort2;
-               RCU_INIT_POINTER(rt2->rt_dev, lo);
+               RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo);
                 rt2->rt_protocol = RTPROT_KERNEL;
                 rt2->rt_payload_type = MPT_IPV6;
-               rt2->rt_via_table = NEIGH_LINK_TABLE;
-               memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len);
+               rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
+               memcpy(rt2->rt_nh->nh_via, lo->dev_addr, lo->addr_len);
         }
  
         rtnl_lock();
@@ -1085,7 +1366,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
  
         /* Free any labels beyond the new table */
         for (index = limit; index < old_limit; index++)
-               mpls_route_update(net, index, NULL, NULL, NULL);
+               mpls_route_update(net, index, NULL, NULL);
  
         /* Copy over the old labels */
         cp_size = size;
diff --git a/net/mpls/internal.h b/net/mpls/internal.h

index 2681a4ba6c375f3faf83498150350ddea7392ccc..d7757be39877e84530ebecfc18fbb7dab502f979 100644 (file)
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -21,6 +21,54 @@ struct mpls_dev {
  
  struct sk_buff;
  
+#define LABEL_NOT_SPECIFIED (1 << 20)
+#define MAX_NEW_LABELS 2
+
+/* This maximum ha length copied from the definition of struct neighbour */
+#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
+
+enum mpls_payload_type {
+       MPT_UNSPEC, /* IPv4 or IPv6 */
+       MPT_IPV4 = 4,
+       MPT_IPV6 = 6,
+
+       /* Other types not implemented:
+        *  - Pseudo-wire with or without control word (RFC4385)
+        *  - GAL (RFC5586)
+        */
+};
+
+struct mpls_nh { /* next hop label forwarding entry */
+       struct net_device __rcu *nh_dev;
+       u32                     nh_label[MAX_NEW_LABELS];
+       u8                      nh_labels;
+       u8                      nh_via_alen;
+       u8                      nh_via_table;
+       u8                      nh_via[MAX_VIA_ALEN];
+};
+
+struct mpls_route { /* next hop label forwarding entry */
+       struct rcu_head         rt_rcu;
+       u8                      rt_protocol;
+       u8                      rt_payload_type;
+       int                     rt_nhn;
+       struct mpls_nh          rt_nh[0];
+};
+
+#define for_nexthops(rt) {                                             \
+       int nhsel; struct mpls_nh *nh;                  \
+       for (nhsel = 0, nh = (rt)->rt_nh;                               \
+            nhsel < (rt)->rt_nhn;                                      \
+            nh++, nhsel++)
+
+#define change_nexthops(rt) {                                          \
+       int nhsel; struct mpls_nh *nh;                          \
+       for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh);   \
+            nhsel < (rt)->rt_nhn;                                      \
+            nh++, nhsel++)
+
+#define endfor_nexthops(rt) }
+
  static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
  {
         return (struct mpls_shim_hdr *)skb_network_header(skb);
@@ -52,8 +100,10 @@ static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *
  
  int nla_put_labels(struct sk_buff *skb, int attrtype,  u8 labels,
                    const u32 label[]);
-int nla_get_labels(const struct nlattr *nla, u32 max_labels, u32 *labels,
+int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels,
                    u32 label[]);
+int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table,
+               u8 via[]);
  bool mpls_output_possible(const struct net_device *dev);
  unsigned int mpls_dev_mtu(const struct net_device *dev);
  bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu);
author	David S. Miller <davem@davemloft.net>
	Fri, 23 Oct 2015 13:58:09 +0000 (06:58 -0700)
committer	David S. Miller <davem@davemloft.net>
	Fri, 23 Oct 2015 13:58:09 +0000 (06:58 -0700)
drivers/net/dsa/mv88e6060.c		patch \| blob \| history
drivers/net/dsa/mv88e6xxx.c		patch \| blob \| history
drivers/net/ethernet/ti/cpsw.c		patch \| blob \| history
drivers/net/phy/mdio_bus.c		patch \| blob \| history
include/linux/phy.h		patch \| blob \| history
include/net/inet_connection_sock.h		patch \| blob \| history
include/net/inet_hashtables.h		patch \| blob \| history
include/net/mpls_iptunnel.h		patch \| blob \| history
include/net/tcp.h		patch \| blob \| history
net/dccp/dccp.h		patch \| blob \| history
net/dccp/ipv4.c		patch \| blob \| history
net/dccp/ipv6.c		patch \| blob \| history
net/dccp/minisocks.c		patch \| blob \| history
net/ipv4/inet_connection_sock.c		patch \| blob \| history
net/ipv4/inet_hashtables.c		patch \| blob \| history
net/ipv4/syncookies.c		patch \| blob \| history
net/ipv4/tcp_fastopen.c		patch \| blob \| history
net/ipv4/tcp_ipv4.c		patch \| blob \| history
net/ipv4/tcp_minisocks.c		patch \| blob \| history
net/ipv6/tcp_ipv6.c		patch \| blob \| history
net/mpls/af_mpls.c		patch \| blob \| history
net/mpls/internal.h		patch \| blob \| history