--- /dev/null
+* Altera Triple-Speed Ethernet MAC driver (TSE)
+
+Required properties:
+- compatible: Should be "altr,tse-1.0" for legacy SGDMA based TSE, and should
+ be "altr,tse-msgdma-1.0" for the preferred MSGDMA based TSE.
+ ALTR is supported for legacy device trees, but is deprecated.
+ altr should be used for all new designs.
+- reg: Address and length of the register set for the device. It contains
+ the information of registers in the same order as described by reg-names
+- reg-names: Should contain the reg names
+ "control_port": MAC configuration space region
+ "tx_csr": xDMA Tx dispatcher control and status space region
+ "tx_desc": MSGDMA Tx dispatcher descriptor space region
+ "rx_csr" : xDMA Rx dispatcher control and status space region
+ "rx_desc": MSGDMA Rx dispatcher descriptor space region
+ "rx_resp": MSGDMA Rx dispatcher response space region
+ "s1": SGDMA descriptor memory
+- interrupts: Should contain the TSE interrupts and it's mode.
+- interrupt-names: Should contain the interrupt names
+ "rx_irq": xDMA Rx dispatcher interrupt
+ "tx_irq": xDMA Tx dispatcher interrupt
+- rx-fifo-depth: MAC receive FIFO buffer depth in bytes
+- tx-fifo-depth: MAC transmit FIFO buffer depth in bytes
+- phy-mode: See ethernet.txt in the same directory.
+- phy-handle: See ethernet.txt in the same directory.
+- phy-addr: See ethernet.txt in the same directory. A configuration should
+ include phy-handle or phy-addr.
+- altr,has-supplementary-unicast:
+ If present, TSE supports additional unicast addresses.
+ Otherwise additional unicast addresses are not supported.
+- altr,has-hash-multicast-filter:
+ If present, TSE supports a hash based multicast filter.
+ Otherwise, hash-based multicast filtering is not supported.
+
+- mdio device tree subnode: When the TSE has a phy connected to its local
+ mdio, there must be device tree subnode with the following
+ required properties:
+
+ - compatible: Must be "altr,tse-mdio".
+ - #address-cells: Must be <1>.
+ - #size-cells: Must be <0>.
+
+ For each phy on the mdio bus, there must be a node with the following
+ fields:
+
+ - reg: phy id used to communicate to phy.
+ - device_type: Must be "ethernet-phy".
+
+Optional properties:
+- local-mac-address: See ethernet.txt in the same directory.
+- max-frame-size: See ethernet.txt in the same directory.
+
+Example:
+
+ tse_sub_0_eth_tse_0: ethernet@0x1,00000000 {
+ compatible = "altr,tse-msgdma-1.0";
+ reg = <0x00000001 0x00000000 0x00000400>,
+ <0x00000001 0x00000460 0x00000020>,
+ <0x00000001 0x00000480 0x00000020>,
+ <0x00000001 0x000004A0 0x00000008>,
+ <0x00000001 0x00000400 0x00000020>,
+ <0x00000001 0x00000420 0x00000020>;
+ reg-names = "control_port", "rx_csr", "rx_desc", "rx_resp", "tx_csr", "tx_desc";
+ interrupt-parent = <&hps_0_arm_gic_0>;
+ interrupts = <0 41 4>, <0 40 4>;
+ interrupt-names = "rx_irq", "tx_irq";
+ rx-fifo-depth = <2048>;
+ tx-fifo-depth = <2048>;
+ address-bits = <48>;
+ max-frame-size = <1500>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+ phy-mode = "gmii";
+ altr,has-supplementary-unicast;
+ altr,has-hash-multicast-filter;
+ phy-handle = <&phy0>;
+ mdio {
+ compatible = "altr,tse-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy0: ethernet-phy@0 {
+ reg = <0x0>;
+ device_type = "ethernet-phy";
+ };
+
+ phy1: ethernet-phy@1 {
+ reg = <0x1>;
+ device_type = "ethernet-phy";
+ };
+
+ };
+ };
+
+ tse_sub_1_eth_tse_0: ethernet@0x1,00001000 {
+ compatible = "altr,tse-msgdma-1.0";
+ reg = <0x00000001 0x00001000 0x00000400>,
+ <0x00000001 0x00001460 0x00000020>,
+ <0x00000001 0x00001480 0x00000020>,
+ <0x00000001 0x000014A0 0x00000008>,
+ <0x00000001 0x00001400 0x00000020>,
+ <0x00000001 0x00001420 0x00000020>;
+ reg-names = "control_port", "rx_csr", "rx_desc", "rx_resp", "tx_csr", "tx_desc";
+ interrupt-parent = <&hps_0_arm_gic_0>;
+ interrupts = <0 43 4>, <0 42 4>;
+ interrupt-names = "rx_irq", "tx_irq";
+ rx-fifo-depth = <2048>;
+ tx-fifo-depth = <2048>;
+ address-bits = <48>;
+ max-frame-size = <1500>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+ phy-mode = "gmii";
+ altr,has-supplementary-unicast;
+ altr,has-hash-multicast-filter;
+ phy-handle = <&phy1>;
+ };
--- /dev/null
+ Altera Triple-Speed Ethernet MAC driver
+
+Copyright (C) 2008-2014 Altera Corporation
+
+This is the driver for the Altera Triple-Speed Ethernet (TSE) controllers
+using the SGDMA and MSGDMA soft DMA IP components. The driver uses the
+platform bus to obtain component resources. The designs used to test this
+driver were built for a Cyclone(R) V SOC FPGA board, a Cyclone(R) V FPGA board,
+and tested with ARM and NIOS processor hosts seperately. The anticipated use
+cases are simple communications between an embedded system and an external peer
+for status and simple configuration of the embedded system.
+
+For more information visit www.altera.com and www.rocketboards.org. Support
+forums for the driver may be found on www.rocketboards.org, and a design used
+to test this driver may be found there as well. Support is also available from
+the maintainer of this driver, found in MAINTAINERS.
+
+The Triple-Speed Ethernet, SGDMA, and MSGDMA components are all soft IP
+components that can be assembled and built into an FPGA using the Altera
+Quartus toolchain. Quartus 13.1 and 14.0 were used to build the design that
+this driver was tested against. The sopc2dts tool is used to create the
+device tree for the driver, and may be found at rocketboards.org.
+
+The driver probe function examines the device tree and determines if the
+Triple-Speed Ethernet instance is using an SGDMA or MSGDMA component. The
+probe function then installs the appropriate set of DMA routines to
+initialize, setup transmits, receives, and interrupt handling primitives for
+the respective configurations.
+
+The SGDMA component is to be deprecated in the near future (over the next 1-2
+years as of this writing in early 2014) in favor of the MSGDMA component.
+SGDMA support is included for existing designs and reference in case a
+developer wishes to support their own soft DMA logic and driver support. Any
+new designs should not use the SGDMA.
+
+The SGDMA supports only a single transmit or receive operation at a time, and
+therefore will not perform as well compared to the MSGDMA soft IP. Please
+visit www.altera.com for known, documented SGDMA errata.
+
+Scatter-gather DMA is not supported by the SGDMA or MSGDMA at this time.
+Scatter-gather DMA will be added to a future maintenance update to this
+driver.
+
+Jumbo frames are not supported at this time.
+
+The driver limits PHY operations to 10/100Mbps, and has not yet been fully
+tested for 1Gbps. This support will be added in a future maintenance update.
+
+1) Kernel Configuration
+The kernel configuration option is ALTERA_TSE:
+ Device Drivers ---> Network device support ---> Ethernet driver support --->
+ Altera Triple-Speed Ethernet MAC support (ALTERA_TSE)
+
+2) Driver parameters list:
+ debug: message level (0: no output, 16: all);
+ dma_rx_num: Number of descriptors in the RX list (default is 64);
+ dma_tx_num: Number of descriptors in the TX list (default is 64).
+
+3) Command line options
+Driver parameters can be also passed in command line by using:
+ altera_tse=dma_rx_num:128,dma_tx_num:512
+
+4) Driver information and notes
+
+4.1) Transmit process
+When the driver's transmit routine is called by the kernel, it sets up a
+transmit descriptor by calling the underlying DMA transmit routine (SGDMA or
+MSGDMA), and initites a transmit operation. Once the transmit is complete, an
+interrupt is driven by the transmit DMA logic. The driver handles the transmit
+completion in the context of the interrupt handling chain by recycling
+resource required to send and track the requested transmit operation.
+
+4.2) Receive process
+The driver will post receive buffers to the receive DMA logic during driver
+intialization. Receive buffers may or may not be queued depending upon the
+underlying DMA logic (MSGDMA is able queue receive buffers, SGDMA is not able
+to queue receive buffers to the SGDMA receive logic). When a packet is
+received, the DMA logic generates an interrupt. The driver handles a receive
+interrupt by obtaining the DMA receive logic status, reaping receive
+completions until no more receive completions are available.
+
+4.3) Interrupt Mitigation
+The driver is able to mitigate the number of its DMA interrupts
+using NAPI for receive operations. Interrupt mitigation is not yet supported
+for transmit operations, but will be added in a future maintenance release.
+
+4.4) Ethtool support
+Ethtool is supported. Driver statistics and internal errors can be taken using:
+ethtool -S ethX command. It is possible to dump registers etc.
+
+4.5) PHY Support
+The driver is compatible with PAL to work with PHY and GPHY devices.
+
+4.7) List of source files:
+ o Kconfig
+ o Makefile
+ o altera_tse_main.c: main network device driver
+ o altera_tse_ethtool.c: ethtool support
+ o altera_tse.h: private driver structure and common definitions
+ o altera_msgdma.h: MSGDMA implementation function definitions
+ o altera_sgdma.h: SGDMA implementation function definitions
+ o altera_msgdma.c: MSGDMA implementation
+ o altera_sgdma.c: SGDMA implementation
+ o altera_sgdmahw.h: SGDMA register and descriptor definitions
+ o altera_msgdmahw.h: MSGDMA register and descriptor definitions
+ o altera_utils.c: Driver utility functions
+ o altera_utils.h: Driver utility function definitions
+
+5) Debug Information
+
+The driver exports debug information such as internal statistics,
+debug information, MAC and DMA registers etc.
+
+A user may use the ethtool support to get statistics:
+e.g. using: ethtool -S ethX (that shows the statistics counters)
+or sees the MAC registers: e.g. using: ethtool -d ethX
+
+The developer can also use the "debug" module parameter to get
+further debug information.
+
+6) Statistics Support
+
+The controller and driver support a mix of IEEE standard defined statistics,
+RFC defined statistics, and driver or Altera defined statistics. The four
+specifications containing the standard definitions for these statistics are
+as follows:
+
+ o IEEE 802.3-2012 - IEEE Standard for Ethernet.
+ o RFC 2863 found at http://www.rfc-editor.org/rfc/rfc2863.txt.
+ o RFC 2819 found at http://www.rfc-editor.org/rfc/rfc2819.txt.
+ o Altera Triple Speed Ethernet User Guide, found at http://www.altera.com
+
+The statistics supported by the TSE and the device driver are as follows:
+
+"tx_packets" is equivalent to aFramesTransmittedOK defined in IEEE 802.3-2012,
+Section 5.2.2.1.2. This statistics is the count of frames that are successfully
+transmitted.
+
+"rx_packets" is equivalent to aFramesReceivedOK defined in IEEE 802.3-2012,
+Section 5.2.2.1.5. This statistic is the count of frames that are successfully
+received. This count does not include any error packets such as CRC errors,
+length errors, or alignment errors.
+
+"rx_crc_errors" is equivalent to aFrameCheckSequenceErrors defined in IEEE
+802.3-2012, Section 5.2.2.1.6. This statistic is the count of frames that are
+an integral number of bytes in length and do not pass the CRC test as the frame
+is received.
+
+"rx_align_errors" is equivalent to aAlignmentErrors defined in IEEE 802.3-2012,
+Section 5.2.2.1.7. This statistic is the count of frames that are not an
+integral number of bytes in length and do not pass the CRC test as the frame is
+received.
+
+"tx_bytes" is equivalent to aOctetsTransmittedOK defined in IEEE 802.3-2012,
+Section 5.2.2.1.8. This statistic is the count of data and pad bytes
+successfully transmitted from the interface.
+
+"rx_bytes" is equivalent to aOctetsReceivedOK defined in IEEE 802.3-2012,
+Section 5.2.2.1.14. This statistic is the count of data and pad bytes
+successfully received by the controller.
+
+"tx_pause" is equivalent to aPAUSEMACCtrlFramesTransmitted defined in IEEE
+802.3-2012, Section 30.3.4.2. This statistic is a count of PAUSE frames
+transmitted from the network controller.
+
+"rx_pause" is equivalent to aPAUSEMACCtrlFramesReceived defined in IEEE
+802.3-2012, Section 30.3.4.3. This statistic is a count of PAUSE frames
+received by the network controller.
+
+"rx_errors" is equivalent to ifInErrors defined in RFC 2863. This statistic is
+a count of the number of packets received containing errors that prevented the
+packet from being delivered to a higher level protocol.
+
+"tx_errors" is equivalent to ifOutErrors defined in RFC 2863. This statistic
+is a count of the number of packets that could not be transmitted due to errors.
+
+"rx_unicast" is equivalent to ifInUcastPkts defined in RFC 2863. This
+statistic is a count of the number of packets received that were not addressed
+to the broadcast address or a multicast group.
+
+"rx_multicast" is equivalent to ifInMulticastPkts defined in RFC 2863. This
+statistic is a count of the number of packets received that were addressed to
+a multicast address group.
+
+"rx_broadcast" is equivalent to ifInBroadcastPkts defined in RFC 2863. This
+statistic is a count of the number of packets received that were addressed to
+the broadcast address.
+
+"tx_discards" is equivalent to ifOutDiscards defined in RFC 2863. This
+statistic is the number of outbound packets not transmitted even though an
+error was not detected. An example of a reason this might occur is to free up
+internal buffer space.
+
+"tx_unicast" is equivalent to ifOutUcastPkts defined in RFC 2863. This
+statistic counts the number of packets transmitted that were not addressed to
+a multicast group or broadcast address.
+
+"tx_multicast" is equivalent to ifOutMulticastPkts defined in RFC 2863. This
+statistic counts the number of packets transmitted that were addressed to a
+multicast group.
+
+"tx_broadcast" is equivalent to ifOutBroadcastPkts defined in RFC 2863. This
+statistic counts the number of packets transmitted that were addressed to a
+broadcast address.
+
+"ether_drops" is equivalent to etherStatsDropEvents defined in RFC 2819.
+This statistic counts the number of packets dropped due to lack of internal
+controller resources.
+
+"rx_total_bytes" is equivalent to etherStatsOctets defined in RFC 2819.
+This statistic counts the total number of bytes received by the controller,
+including error and discarded packets.
+
+"rx_total_packets" is equivalent to etherStatsPkts defined in RFC 2819.
+This statistic counts the total number of packets received by the controller,
+including error, discarded, unicast, multicast, and broadcast packets.
+
+"rx_undersize" is equivalent to etherStatsUndersizePkts defined in RFC 2819.
+This statistic counts the number of correctly formed packets received less
+than 64 bytes long.
+
+"rx_oversize" is equivalent to etherStatsOversizePkts defined in RFC 2819.
+This statistic counts the number of correctly formed packets greater than 1518
+bytes long.
+
+"rx_64_bytes" is equivalent to etherStatsPkts64Octets defined in RFC 2819.
+This statistic counts the total number of packets received that were 64 octets
+in length.
+
+"rx_65_127_bytes" is equivalent to etherStatsPkts65to127Octets defined in RFC
+2819. This statistic counts the total number of packets received that were
+between 65 and 127 octets in length inclusive.
+
+"rx_128_255_bytes" is equivalent to etherStatsPkts128to255Octets defined in
+RFC 2819. This statistic is the total number of packets received that were
+between 128 and 255 octets in length inclusive.
+
+"rx_256_511_bytes" is equivalent to etherStatsPkts256to511Octets defined in
+RFC 2819. This statistic is the total number of packets received that were
+between 256 and 511 octets in length inclusive.
+
+"rx_512_1023_bytes" is equivalent to etherStatsPkts512to1023Octets defined in
+RFC 2819. This statistic is the total number of packets received that were
+between 512 and 1023 octets in length inclusive.
+
+"rx_1024_1518_bytes" is equivalent to etherStatsPkts1024to1518Octets define
+in RFC 2819. This statistic is the total number of packets received that were
+between 1024 and 1518 octets in length inclusive.
+
+"rx_gte_1519_bytes" is a statistic defined specific to the behavior of the
+Altera TSE. This statistics counts the number of received good and errored
+frames between the length of 1519 and the maximum frame length configured
+in the frm_length register. See the Altera TSE User Guide for More details.
+
+"rx_jabbers" is equivalent to etherStatsJabbers defined in RFC 2819. This
+statistic is the total number of packets received that were longer than 1518
+octets, and had either a bad CRC with an integral number of octets (CRC Error)
+or a bad CRC with a non-integral number of octets (Alignment Error).
+
+"rx_runts" is equivalent to etherStatsFragments defined in RFC 2819. This
+statistic is the total number of packets received that were less than 64 octets
+in length and had either a bad CRC with an integral number of octets (CRC
+error) or a bad CRC with a non-integral number of octets (Alignment Error).
This parameter adds support for SR-IOV. It causes the driver to spawn up to
max_vfs worth of virtual function.
-QueuePairs
-----------
-Valid Range: 0-1
-Default Value: 1 (TX and RX will be paired onto one interrupt vector)
-
-If set to 0, when MSI-X is enabled, the TX and RX will attempt to occupy
-separate vectors.
-
-This option can be overridden to 1 if there are not sufficient interrupts
-available. This can occur if any combination of RSS, VMDQ, and max_vfs
-results in more than 4 queues being used.
-
-Node
-----
-Valid Range: 0-n
-Default Value: -1 (off)
-
- 0 - n: where n is the number of the NUMA node that should be used to
- allocate memory for this adapter port.
- -1: uses the driver default of allocating memory on whichever processor is
- running insmod/modprobe.
-
- The Node parameter will allow you to pick which NUMA node you want to have
- the adapter allocate memory from. All driver structures, in-memory queues,
- and receive buffers will be allocated on the node specified. This parameter
- is only useful when interrupt affinity is specified, otherwise some portion
- of the time the interrupt could run on a different core than the memory is
- allocated on, causing slower memory access and impacting throughput, CPU, or
- both.
-
-EEE
----
-Valid Range: 0-1
-Default Value: 1 (enabled)
-
- A link between two EEE-compliant devices will result in periodic bursts of
- data followed by long periods where in the link is in an idle state. This Low
- Power Idle (LPI) state is supported in both 1Gbps and 100Mbps link speeds.
- NOTE: EEE support requires autonegotiation.
-
-DMAC
-----
-Valid Range: 0-1
-Default Value: 1 (enabled)
- Enables or disables DMA Coalescing feature.
-
-
-
Additional Configurations
=========================
and not free the skb. A driver not supporting hardware time stamping doesn't
do that. A driver must never touch sk_buff::tstamp! It is used to store
software generated time stamps by the network subsystem.
+- Driver should call skb_tx_timestamp() as close to passing sk_buff to hardware
+ as possible. skb_tx_timestamp() provides a software time stamp if requested
+ and hardware timestamping is not possible (SKBTX_IN_PROGRESS not set).
- As soon as the driver has sent the packet and/or obtained a
hardware time stamp for it, it passes the time stamp back by
calling skb_hwtstamp_tx() with the original skb, the raw
this would occur at a later time in the processing pipeline than other
software time stamping and therefore could lead to unexpected deltas
between time stamps.
-- If the driver did not set the SKBTX_IN_PROGRESS flag (see above), then
- dev_hard_start_xmit() checks whether software time stamping
- is wanted as fallback and potentially generates the time stamp.
L: linux-alpha@vger.kernel.org
F: arch/alpha/
+ALTERA TRIPLE SPEED ETHERNET DRIVER
+M: Vince Bridgers <vbridgers2013@gmail.com
+L: netdev@vger.kernel.org
+L: nios2-dev@lists.rocketboards.org (moderated for non-subscribers)
+S: Maintained
+F: drivers/net/ethernet/altera/
+
ALTERA UART/JTAG UART SERIAL DRIVERS
M: Tobias Klauser <tklauser@distanz.ch>
L: linux-serial@vger.kernel.org
config NETPOLL
def_bool NETCONSOLE
-config NETPOLL_TRAP
- bool "Netpoll traffic trapping"
- default n
- depends on NETPOLL
-
config NET_POLL_CONTROLLER
def_bool NETPOLL
/* meaning: the port was related to an aggregator
* but was not on the aggregator port list
*/
- pr_warn("%s: Warning: Port %d (on %s) was related to aggregator %d but was not on its port list\n",
- port->slave->bond->dev->name,
- port->actor_port_number,
- port->slave->dev->name,
- port->aggregator->aggregator_identifier);
+ pr_warn_ratelimited("%s: Warning: Port %d (on %s) was related to aggregator %d but was not on its port list\n",
+ port->slave->bond->dev->name,
+ port->actor_port_number,
+ port->slave->dev->name,
+ port->aggregator->aggregator_identifier);
}
}
/* search on all aggregators for a suitable aggregator for this port */
break;
default:
- pr_warn("%s: Impossible agg select mode %d\n",
- curr->slave->bond->dev->name,
- __get_agg_selection_mode(curr->lag_ports));
+ pr_warn_ratelimited("%s: Impossible agg select mode %d\n",
+ curr->slave->bond->dev->name,
+ __get_agg_selection_mode(curr->lag_ports));
break;
}
/* check if any partner replys */
if (best->is_individual) {
- pr_warn("%s: Warning: No 802.3ad response from the link partner for any adapters in the bond\n",
- best->slave ?
- best->slave->bond->dev->name : "NULL");
+ pr_warn_ratelimited("%s: Warning: No 802.3ad response from the link partner for any adapters in the bond\n",
+ best->slave ?
+ best->slave->bond->dev->name : "NULL");
}
best->is_active = 1;
/* select the active aggregator for the bond */
if (port) {
if (!port->slave) {
- pr_warn("%s: Warning: bond's first port is uninitialized\n",
- bond->dev->name);
+ pr_warn_ratelimited("%s: Warning: bond's first port is uninitialized\n",
+ bond->dev->name);
goto re_arm;
}
bond_for_each_slave_rcu(bond, slave, iter) {
port = &(SLAVE_AD_INFO(slave).port);
if (!port->slave) {
- pr_warn("%s: Warning: Found an uninitialized port\n",
- bond->dev->name);
+ pr_warn_ratelimited("%s: Warning: Found an uninitialized port\n",
+ bond->dev->name);
goto re_arm;
}
port = &(SLAVE_AD_INFO(slave).port);
if (!port->slave) {
- pr_warn("%s: Warning: port of slave %s is uninitialized\n",
- slave->dev->name, slave->bond->dev->name);
+ pr_warn_ratelimited("%s: Warning: port of slave %s is uninitialized\n",
+ slave->dev->name, slave->bond->dev->name);
return ret;
}
source "drivers/net/ethernet/aeroflex/Kconfig"
source "drivers/net/ethernet/allwinner/Kconfig"
source "drivers/net/ethernet/alteon/Kconfig"
+source "drivers/net/ethernet/altera/Kconfig"
source "drivers/net/ethernet/amd/Kconfig"
source "drivers/net/ethernet/apple/Kconfig"
source "drivers/net/ethernet/arc/Kconfig"
obj-$(CONFIG_GRETH) += aeroflex/
obj-$(CONFIG_NET_VENDOR_ALLWINNER) += allwinner/
obj-$(CONFIG_NET_VENDOR_ALTEON) += alteon/
+obj-$(CONFIG_ALTERA_TSE) += altera/
obj-$(CONFIG_NET_VENDOR_AMD) += amd/
obj-$(CONFIG_NET_VENDOR_APPLE) += apple/
obj-$(CONFIG_NET_VENDOR_ARC) += arc/
--- /dev/null
+config ALTERA_TSE
+ tristate "Altera Triple-Speed Ethernet MAC support"
+ select PHYLIB
+ ---help---
+ This driver supports the Altera Triple-Speed (TSE) Ethernet MAC.
+
+ To compile this driver as a module, choose M here. The module
+ will be called alteratse.
--- /dev/null
+#
+# Makefile for the Altera device drivers.
+#
+
+obj-$(CONFIG_ALTERA_TSE) += altera_tse.o
+altera_tse-objs := altera_tse_main.o altera_tse_ethtool.o \
+altera_msgdma.o altera_sgdma.o altera_utils.o
--- /dev/null
+/* Altera TSE SGDMA and MSGDMA Linux driver
+ * Copyright (C) 2014 Altera Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/netdevice.h>
+#include "altera_utils.h"
+#include "altera_tse.h"
+#include "altera_msgdmahw.h"
+
+/* No initialization work to do for MSGDMA */
+int msgdma_initialize(struct altera_tse_private *priv)
+{
+ return 0;
+}
+
+void msgdma_uninitialize(struct altera_tse_private *priv)
+{
+}
+
+void msgdma_reset(struct altera_tse_private *priv)
+{
+ int counter;
+ struct msgdma_csr *txcsr =
+ (struct msgdma_csr *)priv->tx_dma_csr;
+ struct msgdma_csr *rxcsr =
+ (struct msgdma_csr *)priv->rx_dma_csr;
+
+ /* Reset Rx mSGDMA */
+ iowrite32(MSGDMA_CSR_STAT_MASK, &rxcsr->status);
+ iowrite32(MSGDMA_CSR_CTL_RESET, &rxcsr->control);
+
+ counter = 0;
+ while (counter++ < ALTERA_TSE_SW_RESET_WATCHDOG_CNTR) {
+ if (tse_bit_is_clear(&rxcsr->status,
+ MSGDMA_CSR_STAT_RESETTING))
+ break;
+ udelay(1);
+ }
+
+ if (counter >= ALTERA_TSE_SW_RESET_WATCHDOG_CNTR)
+ netif_warn(priv, drv, priv->dev,
+ "TSE Rx mSGDMA resetting bit never cleared!\n");
+
+ /* clear all status bits */
+ iowrite32(MSGDMA_CSR_STAT_MASK, &rxcsr->status);
+
+ /* Reset Tx mSGDMA */
+ iowrite32(MSGDMA_CSR_STAT_MASK, &txcsr->status);
+ iowrite32(MSGDMA_CSR_CTL_RESET, &txcsr->control);
+
+ counter = 0;
+ while (counter++ < ALTERA_TSE_SW_RESET_WATCHDOG_CNTR) {
+ if (tse_bit_is_clear(&txcsr->status,
+ MSGDMA_CSR_STAT_RESETTING))
+ break;
+ udelay(1);
+ }
+
+ if (counter >= ALTERA_TSE_SW_RESET_WATCHDOG_CNTR)
+ netif_warn(priv, drv, priv->dev,
+ "TSE Tx mSGDMA resetting bit never cleared!\n");
+
+ /* clear all status bits */
+ iowrite32(MSGDMA_CSR_STAT_MASK, &txcsr->status);
+}
+
+void msgdma_disable_rxirq(struct altera_tse_private *priv)
+{
+ struct msgdma_csr *csr = priv->rx_dma_csr;
+ tse_clear_bit(&csr->control, MSGDMA_CSR_CTL_GLOBAL_INTR);
+}
+
+void msgdma_enable_rxirq(struct altera_tse_private *priv)
+{
+ struct msgdma_csr *csr = priv->rx_dma_csr;
+ tse_set_bit(&csr->control, MSGDMA_CSR_CTL_GLOBAL_INTR);
+}
+
+void msgdma_disable_txirq(struct altera_tse_private *priv)
+{
+ struct msgdma_csr *csr = priv->tx_dma_csr;
+ tse_clear_bit(&csr->control, MSGDMA_CSR_CTL_GLOBAL_INTR);
+}
+
+void msgdma_enable_txirq(struct altera_tse_private *priv)
+{
+ struct msgdma_csr *csr = priv->tx_dma_csr;
+ tse_set_bit(&csr->control, MSGDMA_CSR_CTL_GLOBAL_INTR);
+}
+
+void msgdma_clear_rxirq(struct altera_tse_private *priv)
+{
+ struct msgdma_csr *csr = priv->rx_dma_csr;
+ iowrite32(MSGDMA_CSR_STAT_IRQ, &csr->status);
+}
+
+void msgdma_clear_txirq(struct altera_tse_private *priv)
+{
+ struct msgdma_csr *csr = priv->tx_dma_csr;
+ iowrite32(MSGDMA_CSR_STAT_IRQ, &csr->status);
+}
+
+/* return 0 to indicate transmit is pending */
+int msgdma_tx_buffer(struct altera_tse_private *priv, struct tse_buffer *buffer)
+{
+ struct msgdma_extended_desc *desc = priv->tx_dma_desc;
+
+ iowrite32(lower_32_bits(buffer->dma_addr), &desc->read_addr_lo);
+ iowrite32(upper_32_bits(buffer->dma_addr), &desc->read_addr_hi);
+ iowrite32(0, &desc->write_addr_lo);
+ iowrite32(0, &desc->write_addr_hi);
+ iowrite32(buffer->len, &desc->len);
+ iowrite32(0, &desc->burst_seq_num);
+ iowrite32(MSGDMA_DESC_TX_STRIDE, &desc->stride);
+ iowrite32(MSGDMA_DESC_CTL_TX_SINGLE, &desc->control);
+ return 0;
+}
+
+u32 msgdma_tx_completions(struct altera_tse_private *priv)
+{
+ u32 ready = 0;
+ u32 inuse;
+ u32 status;
+ struct msgdma_csr *txcsr =
+ (struct msgdma_csr *)priv->tx_dma_csr;
+
+ /* Get number of sent descriptors */
+ inuse = ioread32(&txcsr->rw_fill_level) & 0xffff;
+
+ if (inuse) { /* Tx FIFO is not empty */
+ ready = priv->tx_prod - priv->tx_cons - inuse - 1;
+ } else {
+ /* Check for buffered last packet */
+ status = ioread32(&txcsr->status);
+ if (status & MSGDMA_CSR_STAT_BUSY)
+ ready = priv->tx_prod - priv->tx_cons - 1;
+ else
+ ready = priv->tx_prod - priv->tx_cons;
+ }
+ return ready;
+}
+
+/* Put buffer to the mSGDMA RX FIFO
+ */
+int msgdma_add_rx_desc(struct altera_tse_private *priv,
+ struct tse_buffer *rxbuffer)
+{
+ struct msgdma_extended_desc *desc = priv->rx_dma_desc;
+ u32 len = priv->rx_dma_buf_sz;
+ dma_addr_t dma_addr = rxbuffer->dma_addr;
+ u32 control = (MSGDMA_DESC_CTL_END_ON_EOP
+ | MSGDMA_DESC_CTL_END_ON_LEN
+ | MSGDMA_DESC_CTL_TR_COMP_IRQ
+ | MSGDMA_DESC_CTL_EARLY_IRQ
+ | MSGDMA_DESC_CTL_TR_ERR_IRQ
+ | MSGDMA_DESC_CTL_GO);
+
+ iowrite32(0, &desc->read_addr_lo);
+ iowrite32(0, &desc->read_addr_hi);
+ iowrite32(lower_32_bits(dma_addr), &desc->write_addr_lo);
+ iowrite32(upper_32_bits(dma_addr), &desc->write_addr_hi);
+ iowrite32(len, &desc->len);
+ iowrite32(0, &desc->burst_seq_num);
+ iowrite32(0x00010001, &desc->stride);
+ iowrite32(control, &desc->control);
+ return 1;
+}
+
+/* status is returned on upper 16 bits,
+ * length is returned in lower 16 bits
+ */
+u32 msgdma_rx_status(struct altera_tse_private *priv)
+{
+ u32 rxstatus = 0;
+ u32 pktlength;
+ u32 pktstatus;
+ struct msgdma_csr *rxcsr =
+ (struct msgdma_csr *)priv->rx_dma_csr;
+ struct msgdma_response *rxresp =
+ (struct msgdma_response *)priv->rx_dma_resp;
+
+ if (ioread32(&rxcsr->resp_fill_level) & 0xffff) {
+ pktlength = ioread32(&rxresp->bytes_transferred);
+ pktstatus = ioread32(&rxresp->status);
+ rxstatus = pktstatus;
+ rxstatus = rxstatus << 16;
+ rxstatus |= (pktlength & 0xffff);
+ }
+ return rxstatus;
+}
--- /dev/null
+/* Altera TSE SGDMA and MSGDMA Linux driver
+ * Copyright (C) 2014 Altera Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ALTERA_MSGDMA_H__
+#define __ALTERA_MSGDMA_H__
+
+void msgdma_reset(struct altera_tse_private *);
+void msgdma_enable_txirq(struct altera_tse_private *);
+void msgdma_enable_rxirq(struct altera_tse_private *);
+void msgdma_disable_rxirq(struct altera_tse_private *);
+void msgdma_disable_txirq(struct altera_tse_private *);
+void msgdma_clear_rxirq(struct altera_tse_private *);
+void msgdma_clear_txirq(struct altera_tse_private *);
+u32 msgdma_tx_completions(struct altera_tse_private *);
+int msgdma_add_rx_desc(struct altera_tse_private *, struct tse_buffer *);
+int msgdma_tx_buffer(struct altera_tse_private *, struct tse_buffer *);
+u32 msgdma_rx_status(struct altera_tse_private *);
+int msgdma_initialize(struct altera_tse_private *);
+void msgdma_uninitialize(struct altera_tse_private *);
+
+#endif /* __ALTERA_MSGDMA_H__ */
--- /dev/null
+/* Altera TSE SGDMA and MSGDMA Linux driver
+ * Copyright (C) 2014 Altera Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ALTERA_MSGDMAHW_H__
+#define __ALTERA_MSGDMAHW_H__
+
+/* mSGDMA standard descriptor format
+ */
+struct msgdma_desc {
+ u32 read_addr; /* data buffer source address */
+ u32 write_addr; /* data buffer destination address */
+ u32 len; /* the number of bytes to transfer per descriptor */
+ u32 control; /* characteristics of the transfer */
+};
+
+/* mSGDMA extended descriptor format
+ */
+struct msgdma_extended_desc {
+ u32 read_addr_lo; /* data buffer source address low bits */
+ u32 write_addr_lo; /* data buffer destination address low bits */
+ u32 len; /* the number of bytes to transfer
+ * per descriptor
+ */
+ u32 burst_seq_num; /* bit 31:24 write burst
+ * bit 23:16 read burst
+ * bit 15:0 sequence number
+ */
+ u32 stride; /* bit 31:16 write stride
+ * bit 15:0 read stride
+ */
+ u32 read_addr_hi; /* data buffer source address high bits */
+ u32 write_addr_hi; /* data buffer destination address high bits */
+ u32 control; /* characteristics of the transfer */
+};
+
+/* mSGDMA descriptor control field bit definitions
+ */
+#define MSGDMA_DESC_CTL_SET_CH(x) ((x) & 0xff)
+#define MSGDMA_DESC_CTL_GEN_SOP BIT(8)
+#define MSGDMA_DESC_CTL_GEN_EOP BIT(9)
+#define MSGDMA_DESC_CTL_PARK_READS BIT(10)
+#define MSGDMA_DESC_CTL_PARK_WRITES BIT(11)
+#define MSGDMA_DESC_CTL_END_ON_EOP BIT(12)
+#define MSGDMA_DESC_CTL_END_ON_LEN BIT(13)
+#define MSGDMA_DESC_CTL_TR_COMP_IRQ BIT(14)
+#define MSGDMA_DESC_CTL_EARLY_IRQ BIT(15)
+#define MSGDMA_DESC_CTL_TR_ERR_IRQ (0xff << 16)
+#define MSGDMA_DESC_CTL_EARLY_DONE BIT(24)
+/* Writing ‘1’ to the ‘go’ bit commits the entire descriptor into the
+ * descriptor FIFO(s)
+ */
+#define MSGDMA_DESC_CTL_GO BIT(31)
+
+/* Tx buffer control flags
+ */
+#define MSGDMA_DESC_CTL_TX_FIRST (MSGDMA_DESC_CTL_GEN_SOP | \
+ MSGDMA_DESC_CTL_TR_ERR_IRQ | \
+ MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_TX_MIDDLE (MSGDMA_DESC_CTL_TR_ERR_IRQ | \
+ MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_TX_LAST (MSGDMA_DESC_CTL_GEN_EOP | \
+ MSGDMA_DESC_CTL_TR_COMP_IRQ | \
+ MSGDMA_DESC_CTL_TR_ERR_IRQ | \
+ MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_TX_SINGLE (MSGDMA_DESC_CTL_GEN_SOP | \
+ MSGDMA_DESC_CTL_GEN_EOP | \
+ MSGDMA_DESC_CTL_TR_COMP_IRQ | \
+ MSGDMA_DESC_CTL_TR_ERR_IRQ | \
+ MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_RX_SINGLE (MSGDMA_DESC_CTL_END_ON_EOP | \
+ MSGDMA_DESC_CTL_END_ON_LEN | \
+ MSGDMA_DESC_CTL_TR_COMP_IRQ | \
+ MSGDMA_DESC_CTL_EARLY_IRQ | \
+ MSGDMA_DESC_CTL_TR_ERR_IRQ | \
+ MSGDMA_DESC_CTL_GO)
+
+/* mSGDMA extended descriptor stride definitions
+ */
+#define MSGDMA_DESC_TX_STRIDE (0x00010001)
+#define MSGDMA_DESC_RX_STRIDE (0x00010001)
+
+/* mSGDMA dispatcher control and status register map
+ */
+struct msgdma_csr {
+ u32 status; /* Read/Clear */
+ u32 control; /* Read/Write */
+ u32 rw_fill_level; /* bit 31:16 - write fill level
+ * bit 15:0 - read fill level
+ */
+ u32 resp_fill_level; /* bit 15:0 */
+ u32 rw_seq_num; /* bit 31:16 - write sequence number
+ * bit 15:0 - read sequence number
+ */
+ u32 pad[3]; /* reserved */
+};
+
+/* mSGDMA CSR status register bit definitions
+ */
+#define MSGDMA_CSR_STAT_BUSY BIT(0)
+#define MSGDMA_CSR_STAT_DESC_BUF_EMPTY BIT(1)
+#define MSGDMA_CSR_STAT_DESC_BUF_FULL BIT(2)
+#define MSGDMA_CSR_STAT_RESP_BUF_EMPTY BIT(3)
+#define MSGDMA_CSR_STAT_RESP_BUF_FULL BIT(4)
+#define MSGDMA_CSR_STAT_STOPPED BIT(5)
+#define MSGDMA_CSR_STAT_RESETTING BIT(6)
+#define MSGDMA_CSR_STAT_STOPPED_ON_ERR BIT(7)
+#define MSGDMA_CSR_STAT_STOPPED_ON_EARLY BIT(8)
+#define MSGDMA_CSR_STAT_IRQ BIT(9)
+#define MSGDMA_CSR_STAT_MASK 0x3FF
+#define MSGDMA_CSR_STAT_MASK_WITHOUT_IRQ 0x1FF
+
+#define MSGDMA_CSR_STAT_BUSY_GET(v) GET_BIT_VALUE(v, 0)
+#define MSGDMA_CSR_STAT_DESC_BUF_EMPTY_GET(v) GET_BIT_VALUE(v, 1)
+#define MSGDMA_CSR_STAT_DESC_BUF_FULL_GET(v) GET_BIT_VALUE(v, 2)
+#define MSGDMA_CSR_STAT_RESP_BUF_EMPTY_GET(v) GET_BIT_VALUE(v, 3)
+#define MSGDMA_CSR_STAT_RESP_BUF_FULL_GET(v) GET_BIT_VALUE(v, 4)
+#define MSGDMA_CSR_STAT_STOPPED_GET(v) GET_BIT_VALUE(v, 5)
+#define MSGDMA_CSR_STAT_RESETTING_GET(v) GET_BIT_VALUE(v, 6)
+#define MSGDMA_CSR_STAT_STOPPED_ON_ERR_GET(v) GET_BIT_VALUE(v, 7)
+#define MSGDMA_CSR_STAT_STOPPED_ON_EARLY_GET(v) GET_BIT_VALUE(v, 8)
+#define MSGDMA_CSR_STAT_IRQ_GET(v) GET_BIT_VALUE(v, 9)
+
+/* mSGDMA CSR control register bit definitions
+ */
+#define MSGDMA_CSR_CTL_STOP BIT(0)
+#define MSGDMA_CSR_CTL_RESET BIT(1)
+#define MSGDMA_CSR_CTL_STOP_ON_ERR BIT(2)
+#define MSGDMA_CSR_CTL_STOP_ON_EARLY BIT(3)
+#define MSGDMA_CSR_CTL_GLOBAL_INTR BIT(4)
+#define MSGDMA_CSR_CTL_STOP_DESCS BIT(5)
+
+/* mSGDMA CSR fill level bits
+ */
+#define MSGDMA_CSR_WR_FILL_LEVEL_GET(v) (((v) & 0xffff0000) >> 16)
+#define MSGDMA_CSR_RD_FILL_LEVEL_GET(v) ((v) & 0x0000ffff)
+#define MSGDMA_CSR_RESP_FILL_LEVEL_GET(v) ((v) & 0x0000ffff)
+
+/* mSGDMA response register map
+ */
+struct msgdma_response {
+ u32 bytes_transferred;
+ u32 status;
+};
+
+/* mSGDMA response register bit definitions
+ */
+#define MSGDMA_RESP_EARLY_TERM BIT(8)
+#define MSGDMA_RESP_ERR_MASK 0xFF
+
+#endif /* __ALTERA_MSGDMA_H__*/
--- /dev/null
+/* Altera TSE SGDMA and MSGDMA Linux driver
+ * Copyright (C) 2014 Altera Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/list.h>
+#include "altera_utils.h"
+#include "altera_tse.h"
+#include "altera_sgdmahw.h"
+#include "altera_sgdma.h"
+
+static void sgdma_descrip(struct sgdma_descrip *desc,
+ struct sgdma_descrip *ndesc,
+ dma_addr_t ndesc_phys,
+ dma_addr_t raddr,
+ dma_addr_t waddr,
+ u16 length,
+ int generate_eop,
+ int rfixed,
+ int wfixed);
+
+static int sgdma_async_write(struct altera_tse_private *priv,
+ struct sgdma_descrip *desc);
+
+static int sgdma_async_read(struct altera_tse_private *priv);
+
+static dma_addr_t
+sgdma_txphysaddr(struct altera_tse_private *priv,
+ struct sgdma_descrip *desc);
+
+static dma_addr_t
+sgdma_rxphysaddr(struct altera_tse_private *priv,
+ struct sgdma_descrip *desc);
+
+static int sgdma_txbusy(struct altera_tse_private *priv);
+
+static int sgdma_rxbusy(struct altera_tse_private *priv);
+
+static void
+queue_tx(struct altera_tse_private *priv, struct tse_buffer *buffer);
+
+static void
+queue_rx(struct altera_tse_private *priv, struct tse_buffer *buffer);
+
+static struct tse_buffer *
+dequeue_tx(struct altera_tse_private *priv);
+
+static struct tse_buffer *
+dequeue_rx(struct altera_tse_private *priv);
+
+static struct tse_buffer *
+queue_rx_peekhead(struct altera_tse_private *priv);
+
+int sgdma_initialize(struct altera_tse_private *priv)
+{
+ priv->txctrlreg = SGDMA_CTRLREG_ILASTD;
+
+ priv->rxctrlreg = SGDMA_CTRLREG_IDESCRIP |
+ SGDMA_CTRLREG_ILASTD;
+
+ INIT_LIST_HEAD(&priv->txlisthd);
+ INIT_LIST_HEAD(&priv->rxlisthd);
+
+ priv->rxdescphys = (dma_addr_t) 0;
+ priv->txdescphys = (dma_addr_t) 0;
+
+ priv->rxdescphys = dma_map_single(priv->device, priv->rx_dma_desc,
+ priv->rxdescmem, DMA_BIDIRECTIONAL);
+
+ if (dma_mapping_error(priv->device, priv->rxdescphys)) {
+ sgdma_uninitialize(priv);
+ netdev_err(priv->dev, "error mapping rx descriptor memory\n");
+ return -EINVAL;
+ }
+
+ priv->txdescphys = dma_map_single(priv->device, priv->rx_dma_desc,
+ priv->rxdescmem, DMA_TO_DEVICE);
+
+ if (dma_mapping_error(priv->device, priv->txdescphys)) {
+ sgdma_uninitialize(priv);
+ netdev_err(priv->dev, "error mapping tx descriptor memory\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void sgdma_uninitialize(struct altera_tse_private *priv)
+{
+ if (priv->rxdescphys)
+ dma_unmap_single(priv->device, priv->rxdescphys,
+ priv->rxdescmem, DMA_BIDIRECTIONAL);
+
+ if (priv->txdescphys)
+ dma_unmap_single(priv->device, priv->txdescphys,
+ priv->txdescmem, DMA_TO_DEVICE);
+}
+
+/* This function resets the SGDMA controller and clears the
+ * descriptor memory used for transmits and receives.
+ */
+void sgdma_reset(struct altera_tse_private *priv)
+{
+ u32 *ptxdescripmem = (u32 *)priv->tx_dma_desc;
+ u32 txdescriplen = priv->txdescmem;
+ u32 *prxdescripmem = (u32 *)priv->rx_dma_desc;
+ u32 rxdescriplen = priv->rxdescmem;
+ struct sgdma_csr *ptxsgdma = (struct sgdma_csr *)priv->tx_dma_csr;
+ struct sgdma_csr *prxsgdma = (struct sgdma_csr *)priv->rx_dma_csr;
+
+ /* Initialize descriptor memory to 0 */
+ memset(ptxdescripmem, 0, txdescriplen);
+ memset(prxdescripmem, 0, rxdescriplen);
+
+ iowrite32(SGDMA_CTRLREG_RESET, &ptxsgdma->control);
+ iowrite32(0, &ptxsgdma->control);
+
+ iowrite32(SGDMA_CTRLREG_RESET, &prxsgdma->control);
+ iowrite32(0, &prxsgdma->control);
+}
+
+void sgdma_enable_rxirq(struct altera_tse_private *priv)
+{
+ struct sgdma_csr *csr = (struct sgdma_csr *)priv->rx_dma_csr;
+ priv->rxctrlreg |= SGDMA_CTRLREG_INTEN;
+ tse_set_bit(&csr->control, SGDMA_CTRLREG_INTEN);
+}
+
+void sgdma_enable_txirq(struct altera_tse_private *priv)
+{
+ struct sgdma_csr *csr = (struct sgdma_csr *)priv->tx_dma_csr;
+ priv->txctrlreg |= SGDMA_CTRLREG_INTEN;
+ tse_set_bit(&csr->control, SGDMA_CTRLREG_INTEN);
+}
+
+/* for SGDMA, RX interrupts remain enabled after enabling */
+void sgdma_disable_rxirq(struct altera_tse_private *priv)
+{
+}
+
+/* for SGDMA, TX interrupts remain enabled after enabling */
+void sgdma_disable_txirq(struct altera_tse_private *priv)
+{
+}
+
+void sgdma_clear_rxirq(struct altera_tse_private *priv)
+{
+ struct sgdma_csr *csr = (struct sgdma_csr *)priv->rx_dma_csr;
+ tse_set_bit(&csr->control, SGDMA_CTRLREG_CLRINT);
+}
+
+void sgdma_clear_txirq(struct altera_tse_private *priv)
+{
+ struct sgdma_csr *csr = (struct sgdma_csr *)priv->tx_dma_csr;
+ tse_set_bit(&csr->control, SGDMA_CTRLREG_CLRINT);
+}
+
+/* transmits buffer through SGDMA. Returns number of buffers
+ * transmitted, 0 if not possible.
+ *
+ * tx_lock is held by the caller
+ */
+int sgdma_tx_buffer(struct altera_tse_private *priv, struct tse_buffer *buffer)
+{
+ int pktstx = 0;
+ struct sgdma_descrip *descbase =
+ (struct sgdma_descrip *)priv->tx_dma_desc;
+
+ struct sgdma_descrip *cdesc = &descbase[0];
+ struct sgdma_descrip *ndesc = &descbase[1];
+
+ /* wait 'til the tx sgdma is ready for the next transmit request */
+ if (sgdma_txbusy(priv))
+ return 0;
+
+ sgdma_descrip(cdesc, /* current descriptor */
+ ndesc, /* next descriptor */
+ sgdma_txphysaddr(priv, ndesc),
+ buffer->dma_addr, /* address of packet to xmit */
+ 0, /* write addr 0 for tx dma */
+ buffer->len, /* length of packet */
+ SGDMA_CONTROL_EOP, /* Generate EOP */
+ 0, /* read fixed */
+ SGDMA_CONTROL_WR_FIXED); /* Generate SOP */
+
+ pktstx = sgdma_async_write(priv, cdesc);
+
+ /* enqueue the request to the pending transmit queue */
+ queue_tx(priv, buffer);
+
+ return 1;
+}
+
+
+/* tx_lock held to protect access to queued tx list
+ */
+u32 sgdma_tx_completions(struct altera_tse_private *priv)
+{
+ u32 ready = 0;
+ struct sgdma_descrip *desc = (struct sgdma_descrip *)priv->tx_dma_desc;
+
+ if (!sgdma_txbusy(priv) &&
+ ((desc->control & SGDMA_CONTROL_HW_OWNED) == 0) &&
+ (dequeue_tx(priv))) {
+ ready = 1;
+ }
+
+ return ready;
+}
+
+int sgdma_add_rx_desc(struct altera_tse_private *priv,
+ struct tse_buffer *rxbuffer)
+{
+ queue_rx(priv, rxbuffer);
+ return sgdma_async_read(priv);
+}
+
+/* status is returned on upper 16 bits,
+ * length is returned in lower 16 bits
+ */
+u32 sgdma_rx_status(struct altera_tse_private *priv)
+{
+ struct sgdma_csr *csr = (struct sgdma_csr *)priv->rx_dma_csr;
+ struct sgdma_descrip *base = (struct sgdma_descrip *)priv->rx_dma_desc;
+ struct sgdma_descrip *desc = NULL;
+ int pktsrx;
+ unsigned int rxstatus = 0;
+ unsigned int pktlength = 0;
+ unsigned int pktstatus = 0;
+ struct tse_buffer *rxbuffer = NULL;
+
+ dma_sync_single_for_cpu(priv->device,
+ priv->rxdescphys,
+ priv->rxdescmem,
+ DMA_BIDIRECTIONAL);
+
+ desc = &base[0];
+ if ((ioread32(&csr->status) & SGDMA_STSREG_EOP) ||
+ (desc->status & SGDMA_STATUS_EOP)) {
+ pktlength = desc->bytes_xferred;
+ pktstatus = desc->status & 0x3f;
+ rxstatus = pktstatus;
+ rxstatus = rxstatus << 16;
+ rxstatus |= (pktlength & 0xffff);
+
+ desc->status = 0;
+
+ rxbuffer = dequeue_rx(priv);
+ if (rxbuffer == NULL)
+ netdev_err(priv->dev,
+ "sgdma rx and rx queue empty!\n");
+
+ /* kick the rx sgdma after reaping this descriptor */
+ pktsrx = sgdma_async_read(priv);
+ }
+
+ return rxstatus;
+}
+
+
+/* Private functions */
+static void sgdma_descrip(struct sgdma_descrip *desc,
+ struct sgdma_descrip *ndesc,
+ dma_addr_t ndesc_phys,
+ dma_addr_t raddr,
+ dma_addr_t waddr,
+ u16 length,
+ int generate_eop,
+ int rfixed,
+ int wfixed)
+{
+ /* Clear the next descriptor as not owned by hardware */
+ u32 ctrl = ndesc->control;
+ ctrl &= ~SGDMA_CONTROL_HW_OWNED;
+ ndesc->control = ctrl;
+
+ ctrl = 0;
+ ctrl = SGDMA_CONTROL_HW_OWNED;
+ ctrl |= generate_eop;
+ ctrl |= rfixed;
+ ctrl |= wfixed;
+
+ /* Channel is implicitly zero, initialized to 0 by default */
+
+ desc->raddr = raddr;
+ desc->waddr = waddr;
+ desc->next = lower_32_bits(ndesc_phys);
+ desc->control = ctrl;
+ desc->status = 0;
+ desc->rburst = 0;
+ desc->wburst = 0;
+ desc->bytes = length;
+ desc->bytes_xferred = 0;
+}
+
+/* If hardware is busy, don't restart async read.
+ * if status register is 0 - meaning initial state, restart async read,
+ * probably for the first time when populating a receive buffer.
+ * If read status indicate not busy and a status, restart the async
+ * DMA read.
+ */
+static int sgdma_async_read(struct altera_tse_private *priv)
+{
+ struct sgdma_csr *csr = (struct sgdma_csr *)priv->rx_dma_csr;
+ struct sgdma_descrip *descbase =
+ (struct sgdma_descrip *)priv->rx_dma_desc;
+
+ struct sgdma_descrip *cdesc = &descbase[0];
+ struct sgdma_descrip *ndesc = &descbase[1];
+
+ unsigned int sts = ioread32(&csr->status);
+ struct tse_buffer *rxbuffer = NULL;
+
+ if (!sgdma_rxbusy(priv)) {
+ rxbuffer = queue_rx_peekhead(priv);
+ if (rxbuffer == NULL)
+ return 0;
+
+ sgdma_descrip(cdesc, /* current descriptor */
+ ndesc, /* next descriptor */
+ sgdma_rxphysaddr(priv, ndesc),
+ 0, /* read addr 0 for rx dma */
+ rxbuffer->dma_addr, /* write addr for rx dma */
+ 0, /* read 'til EOP */
+ 0, /* EOP: NA for rx dma */
+ 0, /* read fixed: NA for rx dma */
+ 0); /* SOP: NA for rx DMA */
+
+ /* clear control and status */
+ iowrite32(0, &csr->control);
+
+ /* If statuc available, clear those bits */
+ if (sts & 0xf)
+ iowrite32(0xf, &csr->status);
+
+ dma_sync_single_for_device(priv->device,
+ priv->rxdescphys,
+ priv->rxdescmem,
+ DMA_BIDIRECTIONAL);
+
+ iowrite32(lower_32_bits(sgdma_rxphysaddr(priv, cdesc)),
+ &csr->next_descrip);
+
+ iowrite32((priv->rxctrlreg | SGDMA_CTRLREG_START),
+ &csr->control);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int sgdma_async_write(struct altera_tse_private *priv,
+ struct sgdma_descrip *desc)
+{
+ struct sgdma_csr *csr = (struct sgdma_csr *)priv->tx_dma_csr;
+
+ if (sgdma_txbusy(priv))
+ return 0;
+
+ /* clear control and status */
+ iowrite32(0, &csr->control);
+ iowrite32(0x1f, &csr->status);
+
+ dma_sync_single_for_device(priv->device, priv->txdescphys,
+ priv->txdescmem, DMA_TO_DEVICE);
+
+ iowrite32(lower_32_bits(sgdma_txphysaddr(priv, desc)),
+ &csr->next_descrip);
+
+ iowrite32((priv->txctrlreg | SGDMA_CTRLREG_START),
+ &csr->control);
+
+ return 1;
+}
+
+static dma_addr_t
+sgdma_txphysaddr(struct altera_tse_private *priv,
+ struct sgdma_descrip *desc)
+{
+ dma_addr_t paddr = priv->txdescmem_busaddr;
+ dma_addr_t offs = (dma_addr_t)((dma_addr_t)desc -
+ (dma_addr_t)priv->tx_dma_desc);
+ return paddr + offs;
+}
+
+static dma_addr_t
+sgdma_rxphysaddr(struct altera_tse_private *priv,
+ struct sgdma_descrip *desc)
+{
+ dma_addr_t paddr = priv->rxdescmem_busaddr;
+ dma_addr_t offs = (dma_addr_t)((dma_addr_t)desc -
+ (dma_addr_t)priv->rx_dma_desc);
+ return paddr + offs;
+}
+
+#define list_remove_head(list, entry, type, member) \
+ do { \
+ entry = NULL; \
+ if (!list_empty(list)) { \
+ entry = list_entry((list)->next, type, member); \
+ list_del_init(&entry->member); \
+ } \
+ } while (0)
+
+#define list_peek_head(list, entry, type, member) \
+ do { \
+ entry = NULL; \
+ if (!list_empty(list)) { \
+ entry = list_entry((list)->next, type, member); \
+ } \
+ } while (0)
+
+/* adds a tse_buffer to the tail of a tx buffer list.
+ * assumes the caller is managing and holding a mutual exclusion
+ * primitive to avoid simultaneous pushes/pops to the list.
+ */
+static void
+queue_tx(struct altera_tse_private *priv, struct tse_buffer *buffer)
+{
+ list_add_tail(&buffer->lh, &priv->txlisthd);
+}
+
+
+/* adds a tse_buffer to the tail of a rx buffer list
+ * assumes the caller is managing and holding a mutual exclusion
+ * primitive to avoid simultaneous pushes/pops to the list.
+ */
+static void
+queue_rx(struct altera_tse_private *priv, struct tse_buffer *buffer)
+{
+ list_add_tail(&buffer->lh, &priv->rxlisthd);
+}
+
+/* dequeues a tse_buffer from the transmit buffer list, otherwise
+ * returns NULL if empty.
+ * assumes the caller is managing and holding a mutual exclusion
+ * primitive to avoid simultaneous pushes/pops to the list.
+ */
+static struct tse_buffer *
+dequeue_tx(struct altera_tse_private *priv)
+{
+ struct tse_buffer *buffer = NULL;
+ list_remove_head(&priv->txlisthd, buffer, struct tse_buffer, lh);
+ return buffer;
+}
+
+/* dequeues a tse_buffer from the receive buffer list, otherwise
+ * returns NULL if empty
+ * assumes the caller is managing and holding a mutual exclusion
+ * primitive to avoid simultaneous pushes/pops to the list.
+ */
+static struct tse_buffer *
+dequeue_rx(struct altera_tse_private *priv)
+{
+ struct tse_buffer *buffer = NULL;
+ list_remove_head(&priv->rxlisthd, buffer, struct tse_buffer, lh);
+ return buffer;
+}
+
+/* dequeues a tse_buffer from the receive buffer list, otherwise
+ * returns NULL if empty
+ * assumes the caller is managing and holding a mutual exclusion
+ * primitive to avoid simultaneous pushes/pops to the list while the
+ * head is being examined.
+ */
+static struct tse_buffer *
+queue_rx_peekhead(struct altera_tse_private *priv)
+{
+ struct tse_buffer *buffer = NULL;
+ list_peek_head(&priv->rxlisthd, buffer, struct tse_buffer, lh);
+ return buffer;
+}
+
+/* check and return rx sgdma status without polling
+ */
+static int sgdma_rxbusy(struct altera_tse_private *priv)
+{
+ struct sgdma_csr *csr = (struct sgdma_csr *)priv->rx_dma_csr;
+ return ioread32(&csr->status) & SGDMA_STSREG_BUSY;
+}
+
+/* waits for the tx sgdma to finish it's current operation, returns 0
+ * when it transitions to nonbusy, returns 1 if the operation times out
+ */
+static int sgdma_txbusy(struct altera_tse_private *priv)
+{
+ int delay = 0;
+ struct sgdma_csr *csr = (struct sgdma_csr *)priv->tx_dma_csr;
+
+ /* if DMA is busy, wait for current transactino to finish */
+ while ((ioread32(&csr->status) & SGDMA_STSREG_BUSY) && (delay++ < 100))
+ udelay(1);
+
+ if (ioread32(&csr->status) & SGDMA_STSREG_BUSY) {
+ netdev_err(priv->dev, "timeout waiting for tx dma\n");
+ return 1;
+ }
+ return 0;
+}
--- /dev/null
+/* Altera TSE SGDMA and MSGDMA Linux driver
+ * Copyright (C) 2014 Altera Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ALTERA_SGDMA_H__
+#define __ALTERA_SGDMA_H__
+
+void sgdma_reset(struct altera_tse_private *);
+void sgdma_enable_txirq(struct altera_tse_private *);
+void sgdma_enable_rxirq(struct altera_tse_private *);
+void sgdma_disable_rxirq(struct altera_tse_private *);
+void sgdma_disable_txirq(struct altera_tse_private *);
+void sgdma_clear_rxirq(struct altera_tse_private *);
+void sgdma_clear_txirq(struct altera_tse_private *);
+int sgdma_tx_buffer(struct altera_tse_private *priv, struct tse_buffer *);
+u32 sgdma_tx_completions(struct altera_tse_private *);
+int sgdma_add_rx_desc(struct altera_tse_private *priv, struct tse_buffer *);
+void sgdma_status(struct altera_tse_private *);
+u32 sgdma_rx_status(struct altera_tse_private *);
+int sgdma_initialize(struct altera_tse_private *);
+void sgdma_uninitialize(struct altera_tse_private *);
+
+#endif /* __ALTERA_SGDMA_H__ */
--- /dev/null
+/* Altera TSE SGDMA and MSGDMA Linux driver
+ * Copyright (C) 2014 Altera Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ALTERA_SGDMAHW_H__
+#define __ALTERA_SGDMAHW_H__
+
+/* SGDMA descriptor structure */
+struct sgdma_descrip {
+ unsigned int raddr; /* address of data to be read */
+ unsigned int pad1;
+ unsigned int waddr;
+ unsigned int pad2;
+ unsigned int next;
+ unsigned int pad3;
+ unsigned short bytes;
+ unsigned char rburst;
+ unsigned char wburst;
+ unsigned short bytes_xferred; /* 16 bits, bytes xferred */
+
+ /* bit 0: error
+ * bit 1: length error
+ * bit 2: crc error
+ * bit 3: truncated error
+ * bit 4: phy error
+ * bit 5: collision error
+ * bit 6: reserved
+ * bit 7: status eop for recv case
+ */
+ unsigned char status;
+
+ /* bit 0: eop
+ * bit 1: read_fixed
+ * bit 2: write fixed
+ * bits 3,4,5,6: Channel (always 0)
+ * bit 7: hardware owned
+ */
+ unsigned char control;
+} __packed;
+
+
+#define SGDMA_STATUS_ERR BIT(0)
+#define SGDMA_STATUS_LENGTH_ERR BIT(1)
+#define SGDMA_STATUS_CRC_ERR BIT(2)
+#define SGDMA_STATUS_TRUNC_ERR BIT(3)
+#define SGDMA_STATUS_PHY_ERR BIT(4)
+#define SGDMA_STATUS_COLL_ERR BIT(5)
+#define SGDMA_STATUS_EOP BIT(7)
+
+#define SGDMA_CONTROL_EOP BIT(0)
+#define SGDMA_CONTROL_RD_FIXED BIT(1)
+#define SGDMA_CONTROL_WR_FIXED BIT(2)
+
+/* Channel is always 0, so just zero initialize it */
+
+#define SGDMA_CONTROL_HW_OWNED BIT(7)
+
+/* SGDMA register space */
+struct sgdma_csr {
+ /* bit 0: error
+ * bit 1: eop
+ * bit 2: descriptor completed
+ * bit 3: chain completed
+ * bit 4: busy
+ * remainder reserved
+ */
+ u32 status;
+ u32 pad1[3];
+
+ /* bit 0: interrupt on error
+ * bit 1: interrupt on eop
+ * bit 2: interrupt after every descriptor
+ * bit 3: interrupt after last descrip in a chain
+ * bit 4: global interrupt enable
+ * bit 5: starts descriptor processing
+ * bit 6: stop core on dma error
+ * bit 7: interrupt on max descriptors
+ * bits 8-15: max descriptors to generate interrupt
+ * bit 16: Software reset
+ * bit 17: clears owned by hardware if 0, does not clear otherwise
+ * bit 18: enables descriptor polling mode
+ * bit 19-26: clocks before polling again
+ * bit 27-30: reserved
+ * bit 31: clear interrupt
+ */
+ u32 control;
+ u32 pad2[3];
+ u32 next_descrip;
+ u32 pad3[3];
+};
+
+
+#define SGDMA_STSREG_ERR BIT(0) /* Error */
+#define SGDMA_STSREG_EOP BIT(1) /* EOP */
+#define SGDMA_STSREG_DESCRIP BIT(2) /* Descriptor completed */
+#define SGDMA_STSREG_CHAIN BIT(3) /* Chain completed */
+#define SGDMA_STSREG_BUSY BIT(4) /* Controller busy */
+
+#define SGDMA_CTRLREG_IOE BIT(0) /* Interrupt on error */
+#define SGDMA_CTRLREG_IOEOP BIT(1) /* Interrupt on EOP */
+#define SGDMA_CTRLREG_IDESCRIP BIT(2) /* Interrupt after every descriptor */
+#define SGDMA_CTRLREG_ILASTD BIT(3) /* Interrupt after last descriptor */
+#define SGDMA_CTRLREG_INTEN BIT(4) /* Global Interrupt enable */
+#define SGDMA_CTRLREG_START BIT(5) /* starts descriptor processing */
+#define SGDMA_CTRLREG_STOPERR BIT(6) /* stop on dma error */
+#define SGDMA_CTRLREG_INTMAX BIT(7) /* Interrupt on max descriptors */
+#define SGDMA_CTRLREG_RESET BIT(16)/* Software reset */
+#define SGDMA_CTRLREG_COBHW BIT(17)/* Clears owned by hardware */
+#define SGDMA_CTRLREG_POLL BIT(18)/* enables descriptor polling mode */
+#define SGDMA_CTRLREG_CLRINT BIT(31)/* Clears interrupt */
+
+#endif /* __ALTERA_SGDMAHW_H__ */
--- /dev/null
+/* Altera Triple-Speed Ethernet MAC driver
+ * Copyright (C) 2008-2014 Altera Corporation. All rights reserved
+ *
+ * Contributors:
+ * Dalon Westergreen
+ * Thomas Chou
+ * Ian Abbott
+ * Yuriy Kozlov
+ * Tobias Klauser
+ * Andriy Smolskyy
+ * Roman Bulgakov
+ * Dmytro Mytarchuk
+ * Matthew Gerlach
+ *
+ * Original driver contributed by SLS.
+ * Major updates contributed by GlobalLogic
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ALTERA_TSE_H__
+#define __ALTERA_TSE_H__
+
+#define ALTERA_TSE_RESOURCE_NAME "altera_tse"
+
+#include <linux/bitops.h>
+#include <linux/if_vlan.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+
+#define ALTERA_TSE_SW_RESET_WATCHDOG_CNTR 10000
+#define ALTERA_TSE_MAC_FIFO_WIDTH 4 /* TX/RX FIFO width in
+ * bytes
+ */
+/* Rx FIFO default settings */
+#define ALTERA_TSE_RX_SECTION_EMPTY 16
+#define ALTERA_TSE_RX_SECTION_FULL 0
+#define ALTERA_TSE_RX_ALMOST_EMPTY 8
+#define ALTERA_TSE_RX_ALMOST_FULL 8
+
+/* Tx FIFO default settings */
+#define ALTERA_TSE_TX_SECTION_EMPTY 16
+#define ALTERA_TSE_TX_SECTION_FULL 0
+#define ALTERA_TSE_TX_ALMOST_EMPTY 8
+#define ALTERA_TSE_TX_ALMOST_FULL 3
+
+/* MAC function configuration default settings */
+#define ALTERA_TSE_TX_IPG_LENGTH 12
+
+#define GET_BIT_VALUE(v, bit) (((v) >> (bit)) & 0x1)
+
+/* MAC Command_Config Register Bit Definitions
+ */
+#define MAC_CMDCFG_TX_ENA BIT(0)
+#define MAC_CMDCFG_RX_ENA BIT(1)
+#define MAC_CMDCFG_XON_GEN BIT(2)
+#define MAC_CMDCFG_ETH_SPEED BIT(3)
+#define MAC_CMDCFG_PROMIS_EN BIT(4)
+#define MAC_CMDCFG_PAD_EN BIT(5)
+#define MAC_CMDCFG_CRC_FWD BIT(6)
+#define MAC_CMDCFG_PAUSE_FWD BIT(7)
+#define MAC_CMDCFG_PAUSE_IGNORE BIT(8)
+#define MAC_CMDCFG_TX_ADDR_INS BIT(9)
+#define MAC_CMDCFG_HD_ENA BIT(10)
+#define MAC_CMDCFG_EXCESS_COL BIT(11)
+#define MAC_CMDCFG_LATE_COL BIT(12)
+#define MAC_CMDCFG_SW_RESET BIT(13)
+#define MAC_CMDCFG_MHASH_SEL BIT(14)
+#define MAC_CMDCFG_LOOP_ENA BIT(15)
+#define MAC_CMDCFG_TX_ADDR_SEL(v) (((v) & 0x7) << 16)
+#define MAC_CMDCFG_MAGIC_ENA BIT(19)
+#define MAC_CMDCFG_SLEEP BIT(20)
+#define MAC_CMDCFG_WAKEUP BIT(21)
+#define MAC_CMDCFG_XOFF_GEN BIT(22)
+#define MAC_CMDCFG_CNTL_FRM_ENA BIT(23)
+#define MAC_CMDCFG_NO_LGTH_CHECK BIT(24)
+#define MAC_CMDCFG_ENA_10 BIT(25)
+#define MAC_CMDCFG_RX_ERR_DISC BIT(26)
+#define MAC_CMDCFG_DISABLE_READ_TIMEOUT BIT(27)
+#define MAC_CMDCFG_CNT_RESET BIT(31)
+
+#define MAC_CMDCFG_TX_ENA_GET(v) GET_BIT_VALUE(v, 0)
+#define MAC_CMDCFG_RX_ENA_GET(v) GET_BIT_VALUE(v, 1)
+#define MAC_CMDCFG_XON_GEN_GET(v) GET_BIT_VALUE(v, 2)
+#define MAC_CMDCFG_ETH_SPEED_GET(v) GET_BIT_VALUE(v, 3)
+#define MAC_CMDCFG_PROMIS_EN_GET(v) GET_BIT_VALUE(v, 4)
+#define MAC_CMDCFG_PAD_EN_GET(v) GET_BIT_VALUE(v, 5)
+#define MAC_CMDCFG_CRC_FWD_GET(v) GET_BIT_VALUE(v, 6)
+#define MAC_CMDCFG_PAUSE_FWD_GET(v) GET_BIT_VALUE(v, 7)
+#define MAC_CMDCFG_PAUSE_IGNORE_GET(v) GET_BIT_VALUE(v, 8)
+#define MAC_CMDCFG_TX_ADDR_INS_GET(v) GET_BIT_VALUE(v, 9)
+#define MAC_CMDCFG_HD_ENA_GET(v) GET_BIT_VALUE(v, 10)
+#define MAC_CMDCFG_EXCESS_COL_GET(v) GET_BIT_VALUE(v, 11)
+#define MAC_CMDCFG_LATE_COL_GET(v) GET_BIT_VALUE(v, 12)
+#define MAC_CMDCFG_SW_RESET_GET(v) GET_BIT_VALUE(v, 13)
+#define MAC_CMDCFG_MHASH_SEL_GET(v) GET_BIT_VALUE(v, 14)
+#define MAC_CMDCFG_LOOP_ENA_GET(v) GET_BIT_VALUE(v, 15)
+#define MAC_CMDCFG_TX_ADDR_SEL_GET(v) (((v) >> 16) & 0x7)
+#define MAC_CMDCFG_MAGIC_ENA_GET(v) GET_BIT_VALUE(v, 19)
+#define MAC_CMDCFG_SLEEP_GET(v) GET_BIT_VALUE(v, 20)
+#define MAC_CMDCFG_WAKEUP_GET(v) GET_BIT_VALUE(v, 21)
+#define MAC_CMDCFG_XOFF_GEN_GET(v) GET_BIT_VALUE(v, 22)
+#define MAC_CMDCFG_CNTL_FRM_ENA_GET(v) GET_BIT_VALUE(v, 23)
+#define MAC_CMDCFG_NO_LGTH_CHECK_GET(v) GET_BIT_VALUE(v, 24)
+#define MAC_CMDCFG_ENA_10_GET(v) GET_BIT_VALUE(v, 25)
+#define MAC_CMDCFG_RX_ERR_DISC_GET(v) GET_BIT_VALUE(v, 26)
+#define MAC_CMDCFG_DISABLE_READ_TIMEOUT_GET(v) GET_BIT_VALUE(v, 27)
+#define MAC_CMDCFG_CNT_RESET_GET(v) GET_BIT_VALUE(v, 31)
+
+/* MDIO registers within MAC register Space
+ */
+struct altera_tse_mdio {
+ u32 control; /* PHY device operation control register */
+ u32 status; /* PHY device operation status register */
+ u32 phy_id1; /* Bits 31:16 of PHY identifier */
+ u32 phy_id2; /* Bits 15:0 of PHY identifier */
+ u32 auto_negotiation_advertisement; /* Auto-negotiation
+ * advertisement
+ * register
+ */
+ u32 remote_partner_base_page_ability;
+
+ u32 reg6;
+ u32 reg7;
+ u32 reg8;
+ u32 reg9;
+ u32 rega;
+ u32 regb;
+ u32 regc;
+ u32 regd;
+ u32 rege;
+ u32 regf;
+ u32 reg10;
+ u32 reg11;
+ u32 reg12;
+ u32 reg13;
+ u32 reg14;
+ u32 reg15;
+ u32 reg16;
+ u32 reg17;
+ u32 reg18;
+ u32 reg19;
+ u32 reg1a;
+ u32 reg1b;
+ u32 reg1c;
+ u32 reg1d;
+ u32 reg1e;
+ u32 reg1f;
+};
+
+/* MAC register Space. Note that some of these registers may or may not be
+ * present depending upon options chosen by the user when the core was
+ * configured and built. Please consult the Altera Triple Speed Ethernet User
+ * Guide for details.
+ */
+struct altera_tse_mac {
+ /* Bits 15:0: MegaCore function revision (0x0800). Bit 31:16: Customer
+ * specific revision
+ */
+ u32 megacore_revision;
+ /* Provides a memory location for user applications to test the device
+ * memory operation.
+ */
+ u32 scratch_pad;
+ /* The host processor uses this register to control and configure the
+ * MAC block
+ */
+ u32 command_config;
+ /* 32-bit primary MAC address word 0 bits 0 to 31 of the primary
+ * MAC address
+ */
+ u32 mac_addr_0;
+ /* 32-bit primary MAC address word 1 bits 32 to 47 of the primary
+ * MAC address
+ */
+ u32 mac_addr_1;
+ /* 14-bit maximum frame length. The MAC receive logic */
+ u32 frm_length;
+ /* The pause quanta is used in each pause frame sent to a remote
+ * Ethernet device, in increments of 512 Ethernet bit times
+ */
+ u32 pause_quanta;
+ /* 12-bit receive FIFO section-empty threshold */
+ u32 rx_section_empty;
+ /* 12-bit receive FIFO section-full threshold */
+ u32 rx_section_full;
+ /* 12-bit transmit FIFO section-empty threshold */
+ u32 tx_section_empty;
+ /* 12-bit transmit FIFO section-full threshold */
+ u32 tx_section_full;
+ /* 12-bit receive FIFO almost-empty threshold */
+ u32 rx_almost_empty;
+ /* 12-bit receive FIFO almost-full threshold */
+ u32 rx_almost_full;
+ /* 12-bit transmit FIFO almost-empty threshold */
+ u32 tx_almost_empty;
+ /* 12-bit transmit FIFO almost-full threshold */
+ u32 tx_almost_full;
+ /* MDIO address of PHY Device 0. Bits 0 to 4 hold a 5-bit PHY address */
+ u32 mdio_phy0_addr;
+ /* MDIO address of PHY Device 1. Bits 0 to 4 hold a 5-bit PHY address */
+ u32 mdio_phy1_addr;
+
+ /* Bit[15:0]—16-bit holdoff quanta */
+ u32 holdoff_quant;
+
+ /* only if 100/1000 BaseX PCS, reserved otherwise */
+ u32 reserved1[5];
+
+ /* Minimum IPG between consecutive transmit frame in terms of bytes */
+ u32 tx_ipg_length;
+
+ /* IEEE 802.3 oEntity Managed Object Support */
+
+ /* The MAC addresses */
+ u32 mac_id_1;
+ u32 mac_id_2;
+
+ /* Number of frames transmitted without error including pause frames */
+ u32 frames_transmitted_ok;
+ /* Number of frames received without error including pause frames */
+ u32 frames_received_ok;
+ /* Number of frames received with a CRC error */
+ u32 frames_check_sequence_errors;
+ /* Frame received with an alignment error */
+ u32 alignment_errors;
+ /* Sum of payload and padding octets of frames transmitted without
+ * error
+ */
+ u32 octets_transmitted_ok;
+ /* Sum of payload and padding octets of frames received without error */
+ u32 octets_received_ok;
+
+ /* IEEE 802.3 oPausedEntity Managed Object Support */
+
+ /* Number of transmitted pause frames */
+ u32 tx_pause_mac_ctrl_frames;
+ /* Number of Received pause frames */
+ u32 rx_pause_mac_ctrl_frames;
+
+ /* IETF MIB (MIB-II) Object Support */
+
+ /* Number of frames received with error */
+ u32 if_in_errors;
+ /* Number of frames transmitted with error */
+ u32 if_out_errors;
+ /* Number of valid received unicast frames */
+ u32 if_in_ucast_pkts;
+ /* Number of valid received multicasts frames (without pause) */
+ u32 if_in_multicast_pkts;
+ /* Number of valid received broadcast frames */
+ u32 if_in_broadcast_pkts;
+ u32 if_out_discards;
+ /* The number of valid unicast frames transmitted */
+ u32 if_out_ucast_pkts;
+ /* The number of valid multicast frames transmitted,
+ * excluding pause frames
+ */
+ u32 if_out_multicast_pkts;
+ u32 if_out_broadcast_pkts;
+
+ /* IETF RMON MIB Object Support */
+
+ /* Counts the number of dropped packets due to internal errors
+ * of the MAC client.
+ */
+ u32 ether_stats_drop_events;
+ /* Total number of bytes received. Good and bad frames. */
+ u32 ether_stats_octets;
+ /* Total number of packets received. Counts good and bad packets. */
+ u32 ether_stats_pkts;
+ /* Number of packets received with less than 64 bytes. */
+ u32 ether_stats_undersize_pkts;
+ /* The number of frames received that are longer than the
+ * value configured in the frm_length register
+ */
+ u32 ether_stats_oversize_pkts;
+ /* Number of received packet with 64 bytes */
+ u32 ether_stats_pkts_64_octets;
+ /* Frames (good and bad) with 65 to 127 bytes */
+ u32 ether_stats_pkts_65to127_octets;
+ /* Frames (good and bad) with 128 to 255 bytes */
+ u32 ether_stats_pkts_128to255_octets;
+ /* Frames (good and bad) with 256 to 511 bytes */
+ u32 ether_stats_pkts_256to511_octets;
+ /* Frames (good and bad) with 512 to 1023 bytes */
+ u32 ether_stats_pkts_512to1023_octets;
+ /* Frames (good and bad) with 1024 to 1518 bytes */
+ u32 ether_stats_pkts_1024to1518_octets;
+
+ /* Any frame length from 1519 to the maximum length configured in the
+ * frm_length register, if it is greater than 1518
+ */
+ u32 ether_stats_pkts_1519tox_octets;
+ /* Too long frames with CRC error */
+ u32 ether_stats_jabbers;
+ /* Too short frames with CRC error */
+ u32 ether_stats_fragments;
+
+ u32 reserved2;
+
+ /* FIFO control register */
+ u32 tx_cmd_stat;
+ u32 rx_cmd_stat;
+
+ /* Extended Statistics Counters */
+ u32 msb_octets_transmitted_ok;
+ u32 msb_octets_received_ok;
+ u32 msb_ether_stats_octets;
+
+ u32 reserved3;
+
+ /* Multicast address resolution table, mapped in the controller address
+ * space
+ */
+ u32 hash_table[64];
+
+ /* Registers 0 to 31 within PHY device 0/1 connected to the MDIO PHY
+ * management interface
+ */
+ struct altera_tse_mdio mdio_phy0;
+ struct altera_tse_mdio mdio_phy1;
+
+ /* 4 Supplemental MAC Addresses */
+ u32 supp_mac_addr_0_0;
+ u32 supp_mac_addr_0_1;
+ u32 supp_mac_addr_1_0;
+ u32 supp_mac_addr_1_1;
+ u32 supp_mac_addr_2_0;
+ u32 supp_mac_addr_2_1;
+ u32 supp_mac_addr_3_0;
+ u32 supp_mac_addr_3_1;
+
+ u32 reserved4[8];
+
+ /* IEEE 1588v2 Feature */
+ u32 tx_period;
+ u32 tx_adjust_fns;
+ u32 tx_adjust_ns;
+ u32 rx_period;
+ u32 rx_adjust_fns;
+ u32 rx_adjust_ns;
+
+ u32 reserved5[42];
+};
+
+/* Transmit and Receive Command Registers Bit Definitions
+ */
+#define ALTERA_TSE_TX_CMD_STAT_OMIT_CRC BIT(17)
+#define ALTERA_TSE_TX_CMD_STAT_TX_SHIFT16 BIT(18)
+#define ALTERA_TSE_RX_CMD_STAT_RX_SHIFT16 BIT(25)
+
+/* Wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer
+ */
+struct tse_buffer {
+ struct list_head lh;
+ struct sk_buff *skb;
+ dma_addr_t dma_addr;
+ u32 len;
+ int mapped_as_page;
+};
+
+struct altera_tse_private;
+
+#define ALTERA_DTYPE_SGDMA 1
+#define ALTERA_DTYPE_MSGDMA 2
+
+/* standard DMA interface for SGDMA and MSGDMA */
+struct altera_dmaops {
+ int altera_dtype;
+ int dmamask;
+ void (*reset_dma)(struct altera_tse_private *);
+ void (*enable_txirq)(struct altera_tse_private *);
+ void (*enable_rxirq)(struct altera_tse_private *);
+ void (*disable_txirq)(struct altera_tse_private *);
+ void (*disable_rxirq)(struct altera_tse_private *);
+ void (*clear_txirq)(struct altera_tse_private *);
+ void (*clear_rxirq)(struct altera_tse_private *);
+ int (*tx_buffer)(struct altera_tse_private *, struct tse_buffer *);
+ u32 (*tx_completions)(struct altera_tse_private *);
+ int (*add_rx_desc)(struct altera_tse_private *, struct tse_buffer *);
+ u32 (*get_rx_status)(struct altera_tse_private *);
+ int (*init_dma)(struct altera_tse_private *);
+ void (*uninit_dma)(struct altera_tse_private *);
+};
+
+/* This structure is private to each device.
+ */
+struct altera_tse_private {
+ struct net_device *dev;
+ struct device *device;
+ struct napi_struct napi;
+
+ /* MAC address space */
+ struct altera_tse_mac __iomem *mac_dev;
+
+ /* TSE Revision */
+ u32 revision;
+
+ /* mSGDMA Rx Dispatcher address space */
+ void __iomem *rx_dma_csr;
+ void __iomem *rx_dma_desc;
+ void __iomem *rx_dma_resp;
+
+ /* mSGDMA Tx Dispatcher address space */
+ void __iomem *tx_dma_csr;
+ void __iomem *tx_dma_desc;
+
+ /* Rx buffers queue */
+ struct tse_buffer *rx_ring;
+ u32 rx_cons;
+ u32 rx_prod;
+ u32 rx_ring_size;
+ u32 rx_dma_buf_sz;
+
+ /* Tx ring buffer */
+ struct tse_buffer *tx_ring;
+ u32 tx_prod;
+ u32 tx_cons;
+ u32 tx_ring_size;
+
+ /* Interrupts */
+ u32 tx_irq;
+ u32 rx_irq;
+
+ /* RX/TX MAC FIFO configs */
+ u32 tx_fifo_depth;
+ u32 rx_fifo_depth;
+ u32 max_mtu;
+
+ /* Hash filter settings */
+ u32 hash_filter;
+ u32 added_unicast;
+
+ /* Descriptor memory info for managing SGDMA */
+ u32 txdescmem;
+ u32 rxdescmem;
+ dma_addr_t rxdescmem_busaddr;
+ dma_addr_t txdescmem_busaddr;
+ u32 txctrlreg;
+ u32 rxctrlreg;
+ dma_addr_t rxdescphys;
+ dma_addr_t txdescphys;
+
+ struct list_head txlisthd;
+ struct list_head rxlisthd;
+
+ /* MAC command_config register protection */
+ spinlock_t mac_cfg_lock;
+ /* Tx path protection */
+ spinlock_t tx_lock;
+ /* Rx DMA & interrupt control protection */
+ spinlock_t rxdma_irq_lock;
+
+ /* PHY */
+ int phy_addr; /* PHY's MDIO address, -1 for autodetection */
+ phy_interface_t phy_iface;
+ struct mii_bus *mdio;
+ struct phy_device *phydev;
+ int oldspeed;
+ int oldduplex;
+ int oldlink;
+
+ /* ethtool msglvl option */
+ u32 msg_enable;
+
+ struct altera_dmaops *dmaops;
+};
+
+/* Function prototypes
+ */
+void altera_tse_set_ethtool_ops(struct net_device *);
+
+#endif /* __ALTERA_TSE_H__ */
--- /dev/null
+/* Ethtool support for Altera Triple-Speed Ethernet MAC driver
+ * Copyright (C) 2008-2014 Altera Corporation. All rights reserved
+ *
+ * Contributors:
+ * Dalon Westergreen
+ * Thomas Chou
+ * Ian Abbott
+ * Yuriy Kozlov
+ * Tobias Klauser
+ * Andriy Smolskyy
+ * Roman Bulgakov
+ * Dmytro Mytarchuk
+ *
+ * Original driver contributed by SLS.
+ * Major updates contributed by GlobalLogic
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/ethtool.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+
+#include "altera_tse.h"
+
+#define TSE_STATS_LEN 31
+#define TSE_NUM_REGS 128
+
+static char const stat_gstrings[][ETH_GSTRING_LEN] = {
+ "tx_packets",
+ "rx_packets",
+ "rx_crc_errors",
+ "rx_align_errors",
+ "tx_bytes",
+ "rx_bytes",
+ "tx_pause",
+ "rx_pause",
+ "rx_errors",
+ "tx_errors",
+ "rx_unicast",
+ "rx_multicast",
+ "rx_broadcast",
+ "tx_discards",
+ "tx_unicast",
+ "tx_multicast",
+ "tx_broadcast",
+ "ether_drops",
+ "rx_total_bytes",
+ "rx_total_packets",
+ "rx_undersize",
+ "rx_oversize",
+ "rx_64_bytes",
+ "rx_65_127_bytes",
+ "rx_128_255_bytes",
+ "rx_256_511_bytes",
+ "rx_512_1023_bytes",
+ "rx_1024_1518_bytes",
+ "rx_gte_1519_bytes",
+ "rx_jabbers",
+ "rx_runts",
+};
+
+static void tse_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ u32 rev = ioread32(&priv->mac_dev->megacore_revision);
+
+ strcpy(info->driver, "Altera TSE MAC IP Driver");
+ strcpy(info->version, "v8.0");
+ snprintf(info->fw_version, ETHTOOL_FWVERS_LEN, "v%d.%d",
+ rev & 0xFFFF, (rev & 0xFFFF0000) >> 16);
+ sprintf(info->bus_info, "platform");
+}
+
+/* Fill in a buffer with the strings which correspond to the
+ * stats
+ */
+static void tse_gstrings(struct net_device *dev, u32 stringset, u8 *buf)
+{
+ memcpy(buf, stat_gstrings, TSE_STATS_LEN * ETH_GSTRING_LEN);
+}
+
+static void tse_fill_stats(struct net_device *dev, struct ethtool_stats *dummy,
+ u64 *buf)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ struct altera_tse_mac *mac = priv->mac_dev;
+ u64 ext;
+
+ buf[0] = ioread32(&mac->frames_transmitted_ok);
+ buf[1] = ioread32(&mac->frames_received_ok);
+ buf[2] = ioread32(&mac->frames_check_sequence_errors);
+ buf[3] = ioread32(&mac->alignment_errors);
+
+ /* Extended aOctetsTransmittedOK counter */
+ ext = (u64) ioread32(&mac->msb_octets_transmitted_ok) << 32;
+ ext |= ioread32(&mac->octets_transmitted_ok);
+ buf[4] = ext;
+
+ /* Extended aOctetsReceivedOK counter */
+ ext = (u64) ioread32(&mac->msb_octets_received_ok) << 32;
+ ext |= ioread32(&mac->octets_received_ok);
+ buf[5] = ext;
+
+ buf[6] = ioread32(&mac->tx_pause_mac_ctrl_frames);
+ buf[7] = ioread32(&mac->rx_pause_mac_ctrl_frames);
+ buf[8] = ioread32(&mac->if_in_errors);
+ buf[9] = ioread32(&mac->if_out_errors);
+ buf[10] = ioread32(&mac->if_in_ucast_pkts);
+ buf[11] = ioread32(&mac->if_in_multicast_pkts);
+ buf[12] = ioread32(&mac->if_in_broadcast_pkts);
+ buf[13] = ioread32(&mac->if_out_discards);
+ buf[14] = ioread32(&mac->if_out_ucast_pkts);
+ buf[15] = ioread32(&mac->if_out_multicast_pkts);
+ buf[16] = ioread32(&mac->if_out_broadcast_pkts);
+ buf[17] = ioread32(&mac->ether_stats_drop_events);
+
+ /* Extended etherStatsOctets counter */
+ ext = (u64) ioread32(&mac->msb_ether_stats_octets) << 32;
+ ext |= ioread32(&mac->ether_stats_octets);
+ buf[18] = ext;
+
+ buf[19] = ioread32(&mac->ether_stats_pkts);
+ buf[20] = ioread32(&mac->ether_stats_undersize_pkts);
+ buf[21] = ioread32(&mac->ether_stats_oversize_pkts);
+ buf[22] = ioread32(&mac->ether_stats_pkts_64_octets);
+ buf[23] = ioread32(&mac->ether_stats_pkts_65to127_octets);
+ buf[24] = ioread32(&mac->ether_stats_pkts_128to255_octets);
+ buf[25] = ioread32(&mac->ether_stats_pkts_256to511_octets);
+ buf[26] = ioread32(&mac->ether_stats_pkts_512to1023_octets);
+ buf[27] = ioread32(&mac->ether_stats_pkts_1024to1518_octets);
+ buf[28] = ioread32(&mac->ether_stats_pkts_1519tox_octets);
+ buf[29] = ioread32(&mac->ether_stats_jabbers);
+ buf[30] = ioread32(&mac->ether_stats_fragments);
+}
+
+static int tse_sset_count(struct net_device *dev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return TSE_STATS_LEN;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static u32 tse_get_msglevel(struct net_device *dev)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ return priv->msg_enable;
+}
+
+static void tse_set_msglevel(struct net_device *dev, uint32_t data)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ priv->msg_enable = data;
+}
+
+static int tse_reglen(struct net_device *dev)
+{
+ return TSE_NUM_REGS * sizeof(u32);
+}
+
+static void tse_get_regs(struct net_device *dev, struct ethtool_regs *regs,
+ void *regbuf)
+{
+ int i;
+ struct altera_tse_private *priv = netdev_priv(dev);
+ u32 *tse_mac_regs = (u32 *)priv->mac_dev;
+ u32 *buf = regbuf;
+
+ for (i = 0; i < TSE_NUM_REGS; i++)
+ buf[i] = ioread32(&tse_mac_regs[i]);
+}
+
+static int tse_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ struct phy_device *phydev = priv->phydev;
+
+ if (phydev == NULL)
+ return -ENODEV;
+
+ return phy_ethtool_gset(phydev, cmd);
+}
+
+static int tse_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ struct phy_device *phydev = priv->phydev;
+
+ if (phydev == NULL)
+ return -ENODEV;
+
+ return phy_ethtool_sset(phydev, cmd);
+}
+
+static const struct ethtool_ops tse_ethtool_ops = {
+ .get_drvinfo = tse_get_drvinfo,
+ .get_regs_len = tse_reglen,
+ .get_regs = tse_get_regs,
+ .get_link = ethtool_op_get_link,
+ .get_settings = tse_get_settings,
+ .set_settings = tse_set_settings,
+ .get_strings = tse_gstrings,
+ .get_sset_count = tse_sset_count,
+ .get_ethtool_stats = tse_fill_stats,
+ .get_msglevel = tse_get_msglevel,
+ .set_msglevel = tse_set_msglevel,
+};
+
+void altera_tse_set_ethtool_ops(struct net_device *netdev)
+{
+ SET_ETHTOOL_OPS(netdev, &tse_ethtool_ops);
+}
--- /dev/null
+/* Altera Triple-Speed Ethernet MAC driver
+ * Copyright (C) 2008-2014 Altera Corporation. All rights reserved
+ *
+ * Contributors:
+ * Dalon Westergreen
+ * Thomas Chou
+ * Ian Abbott
+ * Yuriy Kozlov
+ * Tobias Klauser
+ * Andriy Smolskyy
+ * Roman Bulgakov
+ * Dmytro Mytarchuk
+ * Matthew Gerlach
+ *
+ * Original driver contributed by SLS.
+ * Major updates contributed by GlobalLogic
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/of_device.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <asm/cacheflush.h>
+
+#include "altera_utils.h"
+#include "altera_tse.h"
+#include "altera_sgdma.h"
+#include "altera_msgdma.h"
+
+static atomic_t instance_count = ATOMIC_INIT(~0);
+/* Module parameters */
+static int debug = -1;
+module_param(debug, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug, "Message Level (-1: default, 0: no output, 16: all)");
+
+static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
+ NETIF_MSG_LINK | NETIF_MSG_IFUP |
+ NETIF_MSG_IFDOWN);
+
+#define RX_DESCRIPTORS 64
+static int dma_rx_num = RX_DESCRIPTORS;
+module_param(dma_rx_num, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dma_rx_num, "Number of descriptors in the RX list");
+
+#define TX_DESCRIPTORS 64
+static int dma_tx_num = TX_DESCRIPTORS;
+module_param(dma_tx_num, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dma_tx_num, "Number of descriptors in the TX list");
+
+
+#define POLL_PHY (-1)
+
+/* Make sure DMA buffer size is larger than the max frame size
+ * plus some alignment offset and a VLAN header. If the max frame size is
+ * 1518, a VLAN header would be additional 4 bytes and additional
+ * headroom for alignment is 2 bytes, 2048 is just fine.
+ */
+#define ALTERA_RXDMABUFFER_SIZE 2048
+
+/* Allow network stack to resume queueing packets after we've
+ * finished transmitting at least 1/4 of the packets in the queue.
+ */
+#define TSE_TX_THRESH(x) (x->tx_ring_size / 4)
+
+#define TXQUEUESTOP_THRESHHOLD 2
+
+static struct of_device_id altera_tse_ids[];
+
+static inline u32 tse_tx_avail(struct altera_tse_private *priv)
+{
+ return priv->tx_cons + priv->tx_ring_size - priv->tx_prod - 1;
+}
+
+/* MDIO specific functions
+ */
+static int altera_tse_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
+{
+ struct altera_tse_mac *mac = (struct altera_tse_mac *)bus->priv;
+ unsigned int *mdio_regs = (unsigned int *)&mac->mdio_phy0;
+ u32 data;
+
+ /* set MDIO address */
+ iowrite32((mii_id & 0x1f), &mac->mdio_phy0_addr);
+
+ /* get the data */
+ data = ioread32(&mdio_regs[regnum]) & 0xffff;
+ return data;
+}
+
+static int altera_tse_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
+ u16 value)
+{
+ struct altera_tse_mac *mac = (struct altera_tse_mac *)bus->priv;
+ unsigned int *mdio_regs = (unsigned int *)&mac->mdio_phy0;
+
+ /* set MDIO address */
+ iowrite32((mii_id & 0x1f), &mac->mdio_phy0_addr);
+
+ /* write the data */
+ iowrite32((u32) value, &mdio_regs[regnum]);
+ return 0;
+}
+
+static int altera_tse_mdio_create(struct net_device *dev, unsigned int id)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ int ret;
+ int i;
+ struct device_node *mdio_node = NULL;
+ struct mii_bus *mdio = NULL;
+ struct device_node *child_node = NULL;
+
+ for_each_child_of_node(priv->device->of_node, child_node) {
+ if (of_device_is_compatible(child_node, "altr,tse-mdio")) {
+ mdio_node = child_node;
+ break;
+ }
+ }
+
+ if (mdio_node) {
+ netdev_dbg(dev, "FOUND MDIO subnode\n");
+ } else {
+ netdev_dbg(dev, "NO MDIO subnode\n");
+ return 0;
+ }
+
+ mdio = mdiobus_alloc();
+ if (mdio == NULL) {
+ netdev_err(dev, "Error allocating MDIO bus\n");
+ return -ENOMEM;
+ }
+
+ mdio->name = ALTERA_TSE_RESOURCE_NAME;
+ mdio->read = &altera_tse_mdio_read;
+ mdio->write = &altera_tse_mdio_write;
+ snprintf(mdio->id, MII_BUS_ID_SIZE, "%s-%u", mdio->name, id);
+
+ mdio->irq = kcalloc(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL);
+ if (mdio->irq == NULL) {
+ ret = -ENOMEM;
+ goto out_free_mdio;
+ }
+ for (i = 0; i < PHY_MAX_ADDR; i++)
+ mdio->irq[i] = PHY_POLL;
+
+ mdio->priv = priv->mac_dev;
+ mdio->parent = priv->device;
+
+ ret = of_mdiobus_register(mdio, mdio_node);
+ if (ret != 0) {
+ netdev_err(dev, "Cannot register MDIO bus %s\n",
+ mdio->id);
+ goto out_free_mdio_irq;
+ }
+
+ if (netif_msg_drv(priv))
+ netdev_info(dev, "MDIO bus %s: created\n", mdio->id);
+
+ priv->mdio = mdio;
+ return 0;
+out_free_mdio_irq:
+ kfree(mdio->irq);
+out_free_mdio:
+ mdiobus_free(mdio);
+ mdio = NULL;
+ return ret;
+}
+
+static void altera_tse_mdio_destroy(struct net_device *dev)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+
+ if (priv->mdio == NULL)
+ return;
+
+ if (netif_msg_drv(priv))
+ netdev_info(dev, "MDIO bus %s: removed\n",
+ priv->mdio->id);
+
+ mdiobus_unregister(priv->mdio);
+ kfree(priv->mdio->irq);
+ mdiobus_free(priv->mdio);
+ priv->mdio = NULL;
+}
+
+static int tse_init_rx_buffer(struct altera_tse_private *priv,
+ struct tse_buffer *rxbuffer, int len)
+{
+ rxbuffer->skb = netdev_alloc_skb_ip_align(priv->dev, len);
+ if (!rxbuffer->skb)
+ return -ENOMEM;
+
+ rxbuffer->dma_addr = dma_map_single(priv->device, rxbuffer->skb->data,
+ len,
+ DMA_FROM_DEVICE);
+
+ if (dma_mapping_error(priv->device, rxbuffer->dma_addr)) {
+ netdev_err(priv->dev, "%s: DMA mapping error\n", __func__);
+ dev_kfree_skb_any(rxbuffer->skb);
+ return -EINVAL;
+ }
+ rxbuffer->len = len;
+ return 0;
+}
+
+static void tse_free_rx_buffer(struct altera_tse_private *priv,
+ struct tse_buffer *rxbuffer)
+{
+ struct sk_buff *skb = rxbuffer->skb;
+ dma_addr_t dma_addr = rxbuffer->dma_addr;
+
+ if (skb != NULL) {
+ if (dma_addr)
+ dma_unmap_single(priv->device, dma_addr,
+ rxbuffer->len,
+ DMA_FROM_DEVICE);
+ dev_kfree_skb_any(skb);
+ rxbuffer->skb = NULL;
+ rxbuffer->dma_addr = 0;
+ }
+}
+
+/* Unmap and free Tx buffer resources
+ */
+static void tse_free_tx_buffer(struct altera_tse_private *priv,
+ struct tse_buffer *buffer)
+{
+ if (buffer->dma_addr) {
+ if (buffer->mapped_as_page)
+ dma_unmap_page(priv->device, buffer->dma_addr,
+ buffer->len, DMA_TO_DEVICE);
+ else
+ dma_unmap_single(priv->device, buffer->dma_addr,
+ buffer->len, DMA_TO_DEVICE);
+ buffer->dma_addr = 0;
+ }
+ if (buffer->skb) {
+ dev_kfree_skb_any(buffer->skb);
+ buffer->skb = NULL;
+ }
+}
+
+static int alloc_init_skbufs(struct altera_tse_private *priv)
+{
+ unsigned int rx_descs = priv->rx_ring_size;
+ unsigned int tx_descs = priv->tx_ring_size;
+ int ret = -ENOMEM;
+ int i;
+
+ /* Create Rx ring buffer */
+ priv->rx_ring = kcalloc(rx_descs, sizeof(struct tse_buffer),
+ GFP_KERNEL);
+ if (!priv->rx_ring)
+ goto err_rx_ring;
+
+ /* Create Tx ring buffer */
+ priv->tx_ring = kcalloc(tx_descs, sizeof(struct tse_buffer),
+ GFP_KERNEL);
+ if (!priv->tx_ring)
+ goto err_tx_ring;
+
+ priv->tx_cons = 0;
+ priv->tx_prod = 0;
+
+ /* Init Rx ring */
+ for (i = 0; i < rx_descs; i++) {
+ ret = tse_init_rx_buffer(priv, &priv->rx_ring[i],
+ priv->rx_dma_buf_sz);
+ if (ret)
+ goto err_init_rx_buffers;
+ }
+
+ priv->rx_cons = 0;
+ priv->rx_prod = 0;
+
+ return 0;
+err_init_rx_buffers:
+ while (--i >= 0)
+ tse_free_rx_buffer(priv, &priv->rx_ring[i]);
+ kfree(priv->tx_ring);
+err_tx_ring:
+ kfree(priv->rx_ring);
+err_rx_ring:
+ return ret;
+}
+
+static void free_skbufs(struct net_device *dev)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ unsigned int rx_descs = priv->rx_ring_size;
+ unsigned int tx_descs = priv->tx_ring_size;
+ int i;
+
+ /* Release the DMA TX/RX socket buffers */
+ for (i = 0; i < rx_descs; i++)
+ tse_free_rx_buffer(priv, &priv->rx_ring[i]);
+ for (i = 0; i < tx_descs; i++)
+ tse_free_tx_buffer(priv, &priv->tx_ring[i]);
+
+
+ kfree(priv->tx_ring);
+}
+
+/* Reallocate the skb for the reception process
+ */
+static inline void tse_rx_refill(struct altera_tse_private *priv)
+{
+ unsigned int rxsize = priv->rx_ring_size;
+ unsigned int entry;
+ int ret;
+
+ for (; priv->rx_cons - priv->rx_prod > 0;
+ priv->rx_prod++) {
+ entry = priv->rx_prod % rxsize;
+ if (likely(priv->rx_ring[entry].skb == NULL)) {
+ ret = tse_init_rx_buffer(priv, &priv->rx_ring[entry],
+ priv->rx_dma_buf_sz);
+ if (unlikely(ret != 0))
+ break;
+ priv->dmaops->add_rx_desc(priv, &priv->rx_ring[entry]);
+ }
+ }
+}
+
+/* Pull out the VLAN tag and fix up the packet
+ */
+static inline void tse_rx_vlan(struct net_device *dev, struct sk_buff *skb)
+{
+ struct ethhdr *eth_hdr;
+ u16 vid;
+ if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+ !__vlan_get_tag(skb, &vid)) {
+ eth_hdr = (struct ethhdr *)skb->data;
+ memmove(skb->data + VLAN_HLEN, eth_hdr, ETH_ALEN * 2);
+ skb_pull(skb, VLAN_HLEN);
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+ }
+}
+
+/* Receive a packet: retrieve and pass over to upper levels
+ */
+static int tse_rx(struct altera_tse_private *priv, int limit)
+{
+ unsigned int count = 0;
+ unsigned int next_entry;
+ struct sk_buff *skb;
+ unsigned int entry = priv->rx_cons % priv->rx_ring_size;
+ u32 rxstatus;
+ u16 pktlength;
+ u16 pktstatus;
+
+ while ((rxstatus = priv->dmaops->get_rx_status(priv)) != 0) {
+ pktstatus = rxstatus >> 16;
+ pktlength = rxstatus & 0xffff;
+
+ if ((pktstatus & 0xFF) || (pktlength == 0))
+ netdev_err(priv->dev,
+ "RCV pktstatus %08X pktlength %08X\n",
+ pktstatus, pktlength);
+
+ count++;
+ next_entry = (++priv->rx_cons) % priv->rx_ring_size;
+
+ skb = priv->rx_ring[entry].skb;
+ if (unlikely(!skb)) {
+ netdev_err(priv->dev,
+ "%s: Inconsistent Rx descriptor chain\n",
+ __func__);
+ priv->dev->stats.rx_dropped++;
+ break;
+ }
+ priv->rx_ring[entry].skb = NULL;
+
+ skb_put(skb, pktlength);
+
+ /* make cache consistent with receive packet buffer */
+ dma_sync_single_for_cpu(priv->device,
+ priv->rx_ring[entry].dma_addr,
+ priv->rx_ring[entry].len,
+ DMA_FROM_DEVICE);
+
+ dma_unmap_single(priv->device, priv->rx_ring[entry].dma_addr,
+ priv->rx_ring[entry].len, DMA_FROM_DEVICE);
+
+ if (netif_msg_pktdata(priv)) {
+ netdev_info(priv->dev, "frame received %d bytes\n",
+ pktlength);
+ print_hex_dump(KERN_ERR, "data: ", DUMP_PREFIX_OFFSET,
+ 16, 1, skb->data, pktlength, true);
+ }
+
+ tse_rx_vlan(priv->dev, skb);
+
+ skb->protocol = eth_type_trans(skb, priv->dev);
+ skb_checksum_none_assert(skb);
+
+ napi_gro_receive(&priv->napi, skb);
+
+ priv->dev->stats.rx_packets++;
+ priv->dev->stats.rx_bytes += pktlength;
+
+ entry = next_entry;
+ }
+
+ tse_rx_refill(priv);
+ return count;
+}
+
+/* Reclaim resources after transmission completes
+ */
+static int tse_tx_complete(struct altera_tse_private *priv)
+{
+ unsigned int txsize = priv->tx_ring_size;
+ u32 ready;
+ unsigned int entry;
+ struct tse_buffer *tx_buff;
+ int txcomplete = 0;
+
+ spin_lock(&priv->tx_lock);
+
+ ready = priv->dmaops->tx_completions(priv);
+
+ /* Free sent buffers */
+ while (ready && (priv->tx_cons != priv->tx_prod)) {
+ entry = priv->tx_cons % txsize;
+ tx_buff = &priv->tx_ring[entry];
+
+ if (netif_msg_tx_done(priv))
+ netdev_dbg(priv->dev, "%s: curr %d, dirty %d\n",
+ __func__, priv->tx_prod, priv->tx_cons);
+
+ if (likely(tx_buff->skb))
+ priv->dev->stats.tx_packets++;
+
+ tse_free_tx_buffer(priv, tx_buff);
+ priv->tx_cons++;
+
+ txcomplete++;
+ ready--;
+ }
+
+ if (unlikely(netif_queue_stopped(priv->dev) &&
+ tse_tx_avail(priv) > TSE_TX_THRESH(priv))) {
+ netif_tx_lock(priv->dev);
+ if (netif_queue_stopped(priv->dev) &&
+ tse_tx_avail(priv) > TSE_TX_THRESH(priv)) {
+ if (netif_msg_tx_done(priv))
+ netdev_dbg(priv->dev, "%s: restart transmit\n",
+ __func__);
+ netif_wake_queue(priv->dev);
+ }
+ netif_tx_unlock(priv->dev);
+ }
+
+ spin_unlock(&priv->tx_lock);
+ return txcomplete;
+}
+
+/* NAPI polling function
+ */
+static int tse_poll(struct napi_struct *napi, int budget)
+{
+ struct altera_tse_private *priv =
+ container_of(napi, struct altera_tse_private, napi);
+ int rxcomplete = 0;
+ int txcomplete = 0;
+ unsigned long int flags;
+
+ txcomplete = tse_tx_complete(priv);
+
+ rxcomplete = tse_rx(priv, budget);
+
+ if (rxcomplete >= budget || txcomplete > 0)
+ return rxcomplete;
+
+ napi_gro_flush(napi, false);
+ __napi_complete(napi);
+
+ netdev_dbg(priv->dev,
+ "NAPI Complete, did %d packets with budget %d\n",
+ txcomplete+rxcomplete, budget);
+
+ spin_lock_irqsave(&priv->rxdma_irq_lock, flags);
+ priv->dmaops->enable_rxirq(priv);
+ priv->dmaops->enable_txirq(priv);
+ spin_unlock_irqrestore(&priv->rxdma_irq_lock, flags);
+ return rxcomplete + txcomplete;
+}
+
+/* DMA TX & RX FIFO interrupt routing
+ */
+static irqreturn_t altera_isr(int irq, void *dev_id)
+{
+ struct net_device *dev = dev_id;
+ struct altera_tse_private *priv;
+ unsigned long int flags;
+
+
+ if (unlikely(!dev)) {
+ pr_err("%s: invalid dev pointer\n", __func__);
+ return IRQ_NONE;
+ }
+ priv = netdev_priv(dev);
+
+ /* turn off desc irqs and enable napi rx */
+ spin_lock_irqsave(&priv->rxdma_irq_lock, flags);
+
+ if (likely(napi_schedule_prep(&priv->napi))) {
+ priv->dmaops->disable_rxirq(priv);
+ priv->dmaops->disable_txirq(priv);
+ __napi_schedule(&priv->napi);
+ }
+
+ /* reset IRQs */
+ priv->dmaops->clear_rxirq(priv);
+ priv->dmaops->clear_txirq(priv);
+
+ spin_unlock_irqrestore(&priv->rxdma_irq_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+/* Transmit a packet (called by the kernel). Dispatches
+ * either the SGDMA method for transmitting or the
+ * MSGDMA method, assumes no scatter/gather support,
+ * implying an assumption that there's only one
+ * physically contiguous fragment starting at
+ * skb->data, for length of skb_headlen(skb).
+ */
+static int tse_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ unsigned int txsize = priv->tx_ring_size;
+ unsigned int entry;
+ struct tse_buffer *buffer = NULL;
+ int nfrags = skb_shinfo(skb)->nr_frags;
+ unsigned int nopaged_len = skb_headlen(skb);
+ enum netdev_tx ret = NETDEV_TX_OK;
+ dma_addr_t dma_addr;
+ int txcomplete = 0;
+
+ spin_lock_bh(&priv->tx_lock);
+
+ if (unlikely(tse_tx_avail(priv) < nfrags + 1)) {
+ if (!netif_queue_stopped(dev)) {
+ netif_stop_queue(dev);
+ /* This is a hard error, log it. */
+ netdev_err(priv->dev,
+ "%s: Tx list full when queue awake\n",
+ __func__);
+ }
+ ret = NETDEV_TX_BUSY;
+ goto out;
+ }
+
+ /* Map the first skb fragment */
+ entry = priv->tx_prod % txsize;
+ buffer = &priv->tx_ring[entry];
+
+ dma_addr = dma_map_single(priv->device, skb->data, nopaged_len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(priv->device, dma_addr)) {
+ netdev_err(priv->dev, "%s: DMA mapping error\n", __func__);
+ ret = NETDEV_TX_OK;
+ goto out;
+ }
+
+ buffer->skb = skb;
+ buffer->dma_addr = dma_addr;
+ buffer->len = nopaged_len;
+
+ /* Push data out of the cache hierarchy into main memory */
+ dma_sync_single_for_device(priv->device, buffer->dma_addr,
+ buffer->len, DMA_TO_DEVICE);
+
+ txcomplete = priv->dmaops->tx_buffer(priv, buffer);
+
+ skb_tx_timestamp(skb);
+
+ priv->tx_prod++;
+ dev->stats.tx_bytes += skb->len;
+
+ if (unlikely(tse_tx_avail(priv) <= TXQUEUESTOP_THRESHHOLD)) {
+ if (netif_msg_hw(priv))
+ netdev_dbg(priv->dev, "%s: stop transmitted packets\n",
+ __func__);
+ netif_stop_queue(dev);
+ }
+
+out:
+ spin_unlock_bh(&priv->tx_lock);
+
+ return ret;
+}
+
+/* Called every time the controller might need to be made
+ * aware of new link state. The PHY code conveys this
+ * information through variables in the phydev structure, and this
+ * function converts those variables into the appropriate
+ * register values, and can bring down the device if needed.
+ */
+static void altera_tse_adjust_link(struct net_device *dev)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ struct phy_device *phydev = priv->phydev;
+ int new_state = 0;
+
+ /* only change config if there is a link */
+ spin_lock(&priv->mac_cfg_lock);
+ if (phydev->link) {
+ /* Read old config */
+ u32 cfg_reg = ioread32(&priv->mac_dev->command_config);
+
+ /* Check duplex */
+ if (phydev->duplex != priv->oldduplex) {
+ new_state = 1;
+ if (!(phydev->duplex))
+ cfg_reg |= MAC_CMDCFG_HD_ENA;
+ else
+ cfg_reg &= ~MAC_CMDCFG_HD_ENA;
+
+ netdev_dbg(priv->dev, "%s: Link duplex = 0x%x\n",
+ dev->name, phydev->duplex);
+
+ priv->oldduplex = phydev->duplex;
+ }
+
+ /* Check speed */
+ if (phydev->speed != priv->oldspeed) {
+ new_state = 1;
+ switch (phydev->speed) {
+ case 1000:
+ cfg_reg |= MAC_CMDCFG_ETH_SPEED;
+ cfg_reg &= ~MAC_CMDCFG_ENA_10;
+ break;
+ case 100:
+ cfg_reg &= ~MAC_CMDCFG_ETH_SPEED;
+ cfg_reg &= ~MAC_CMDCFG_ENA_10;
+ break;
+ case 10:
+ cfg_reg &= ~MAC_CMDCFG_ETH_SPEED;
+ cfg_reg |= MAC_CMDCFG_ENA_10;
+ break;
+ default:
+ if (netif_msg_link(priv))
+ netdev_warn(dev, "Speed (%d) is not 10/100/1000!\n",
+ phydev->speed);
+ break;
+ }
+ priv->oldspeed = phydev->speed;
+ }
+ iowrite32(cfg_reg, &priv->mac_dev->command_config);
+
+ if (!priv->oldlink) {
+ new_state = 1;
+ priv->oldlink = 1;
+ }
+ } else if (priv->oldlink) {
+ new_state = 1;
+ priv->oldlink = 0;
+ priv->oldspeed = 0;
+ priv->oldduplex = -1;
+ }
+
+ if (new_state && netif_msg_link(priv))
+ phy_print_status(phydev);
+
+ spin_unlock(&priv->mac_cfg_lock);
+}
+static struct phy_device *connect_local_phy(struct net_device *dev)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ struct phy_device *phydev = NULL;
+ char phy_id_fmt[MII_BUS_ID_SIZE + 3];
+ int ret;
+
+ if (priv->phy_addr != POLL_PHY) {
+ snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT,
+ priv->mdio->id, priv->phy_addr);
+
+ netdev_dbg(dev, "trying to attach to %s\n", phy_id_fmt);
+
+ phydev = phy_connect(dev, phy_id_fmt, &altera_tse_adjust_link,
+ priv->phy_iface);
+ if (IS_ERR(phydev))
+ netdev_err(dev, "Could not attach to PHY\n");
+
+ } else {
+ phydev = phy_find_first(priv->mdio);
+ if (phydev == NULL) {
+ netdev_err(dev, "No PHY found\n");
+ return phydev;
+ }
+
+ ret = phy_connect_direct(dev, phydev, &altera_tse_adjust_link,
+ priv->phy_iface);
+ if (ret != 0) {
+ netdev_err(dev, "Could not attach to PHY\n");
+ phydev = NULL;
+ }
+ }
+ return phydev;
+}
+
+/* Initialize driver's PHY state, and attach to the PHY
+ */
+static int init_phy(struct net_device *dev)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ struct phy_device *phydev;
+ struct device_node *phynode;
+
+ priv->oldlink = 0;
+ priv->oldspeed = 0;
+ priv->oldduplex = -1;
+
+ phynode = of_parse_phandle(priv->device->of_node, "phy-handle", 0);
+
+ if (!phynode) {
+ netdev_dbg(dev, "no phy-handle found\n");
+ if (!priv->mdio) {
+ netdev_err(dev,
+ "No phy-handle nor local mdio specified\n");
+ return -ENODEV;
+ }
+ phydev = connect_local_phy(dev);
+ } else {
+ netdev_dbg(dev, "phy-handle found\n");
+ phydev = of_phy_connect(dev, phynode,
+ &altera_tse_adjust_link, 0, priv->phy_iface);
+ }
+
+ if (!phydev) {
+ netdev_err(dev, "Could not find the PHY\n");
+ return -ENODEV;
+ }
+
+ /* Stop Advertising 1000BASE Capability if interface is not GMII
+ * Note: Checkpatch throws CHECKs for the camel case defines below,
+ * it's ok to ignore.
+ */
+ if ((priv->phy_iface == PHY_INTERFACE_MODE_MII) ||
+ (priv->phy_iface == PHY_INTERFACE_MODE_RMII))
+ phydev->advertising &= ~(SUPPORTED_1000baseT_Half |
+ SUPPORTED_1000baseT_Full);
+
+ /* Broken HW is sometimes missing the pull-up resistor on the
+ * MDIO line, which results in reads to non-existent devices returning
+ * 0 rather than 0xffff. Catch this here and treat 0 as a non-existent
+ * device as well.
+ * Note: phydev->phy_id is the result of reading the UID PHY registers.
+ */
+ if (phydev->phy_id == 0) {
+ netdev_err(dev, "Bad PHY UID 0x%08x\n", phydev->phy_id);
+ phy_disconnect(phydev);
+ return -ENODEV;
+ }
+
+ netdev_dbg(dev, "attached to PHY %d UID 0x%08x Link = %d\n",
+ phydev->addr, phydev->phy_id, phydev->link);
+
+ priv->phydev = phydev;
+ return 0;
+}
+
+static void tse_update_mac_addr(struct altera_tse_private *priv, u8 *addr)
+{
+ struct altera_tse_mac *mac = priv->mac_dev;
+ u32 msb;
+ u32 lsb;
+
+ msb = (addr[3] << 24) | (addr[2] << 16) | (addr[1] << 8) | addr[0];
+ lsb = ((addr[5] << 8) | addr[4]) & 0xffff;
+
+ /* Set primary MAC address */
+ iowrite32(msb, &mac->mac_addr_0);
+ iowrite32(lsb, &mac->mac_addr_1);
+}
+
+/* MAC software reset.
+ * When reset is triggered, the MAC function completes the current
+ * transmission or reception, and subsequently disables the transmit and
+ * receive logic, flushes the receive FIFO buffer, and resets the statistics
+ * counters.
+ */
+static int reset_mac(struct altera_tse_private *priv)
+{
+ void __iomem *cmd_cfg_reg = &priv->mac_dev->command_config;
+ int counter;
+ u32 dat;
+
+ dat = ioread32(cmd_cfg_reg);
+ dat &= ~(MAC_CMDCFG_TX_ENA | MAC_CMDCFG_RX_ENA);
+ dat |= MAC_CMDCFG_SW_RESET | MAC_CMDCFG_CNT_RESET;
+ iowrite32(dat, cmd_cfg_reg);
+
+ counter = 0;
+ while (counter++ < ALTERA_TSE_SW_RESET_WATCHDOG_CNTR) {
+ if (tse_bit_is_clear(cmd_cfg_reg, MAC_CMDCFG_SW_RESET))
+ break;
+ udelay(1);
+ }
+
+ if (counter >= ALTERA_TSE_SW_RESET_WATCHDOG_CNTR) {
+ dat = ioread32(cmd_cfg_reg);
+ dat &= ~MAC_CMDCFG_SW_RESET;
+ iowrite32(dat, cmd_cfg_reg);
+ return -1;
+ }
+ return 0;
+}
+
+/* Initialize MAC core registers
+*/
+static int init_mac(struct altera_tse_private *priv)
+{
+ struct altera_tse_mac *mac = priv->mac_dev;
+ unsigned int cmd = 0;
+ u32 frm_length;
+
+ /* Setup Rx FIFO */
+ iowrite32(priv->rx_fifo_depth - ALTERA_TSE_RX_SECTION_EMPTY,
+ &mac->rx_section_empty);
+ iowrite32(ALTERA_TSE_RX_SECTION_FULL, &mac->rx_section_full);
+ iowrite32(ALTERA_TSE_RX_ALMOST_EMPTY, &mac->rx_almost_empty);
+ iowrite32(ALTERA_TSE_RX_ALMOST_FULL, &mac->rx_almost_full);
+
+ /* Setup Tx FIFO */
+ iowrite32(priv->tx_fifo_depth - ALTERA_TSE_TX_SECTION_EMPTY,
+ &mac->tx_section_empty);
+ iowrite32(ALTERA_TSE_TX_SECTION_FULL, &mac->tx_section_full);
+ iowrite32(ALTERA_TSE_TX_ALMOST_EMPTY, &mac->tx_almost_empty);
+ iowrite32(ALTERA_TSE_TX_ALMOST_FULL, &mac->tx_almost_full);
+
+ /* MAC Address Configuration */
+ tse_update_mac_addr(priv, priv->dev->dev_addr);
+
+ /* MAC Function Configuration */
+ frm_length = ETH_HLEN + priv->dev->mtu + ETH_FCS_LEN;
+ iowrite32(frm_length, &mac->frm_length);
+ iowrite32(ALTERA_TSE_TX_IPG_LENGTH, &mac->tx_ipg_length);
+
+ /* Disable RX/TX shift 16 for alignment of all received frames on 16-bit
+ * start address
+ */
+ tse_clear_bit(&mac->rx_cmd_stat, ALTERA_TSE_RX_CMD_STAT_RX_SHIFT16);
+ tse_clear_bit(&mac->tx_cmd_stat, ALTERA_TSE_TX_CMD_STAT_TX_SHIFT16 |
+ ALTERA_TSE_TX_CMD_STAT_OMIT_CRC);
+
+ /* Set the MAC options */
+ cmd = ioread32(&mac->command_config);
+ cmd |= MAC_CMDCFG_PAD_EN; /* Padding Removal on Receive */
+ cmd &= ~MAC_CMDCFG_CRC_FWD; /* CRC Removal */
+ cmd |= MAC_CMDCFG_RX_ERR_DISC; /* Automatically discard frames
+ * with CRC errors
+ */
+ cmd |= MAC_CMDCFG_CNTL_FRM_ENA;
+ cmd &= ~MAC_CMDCFG_TX_ENA;
+ cmd &= ~MAC_CMDCFG_RX_ENA;
+ iowrite32(cmd, &mac->command_config);
+
+ if (netif_msg_hw(priv))
+ dev_dbg(priv->device,
+ "MAC post-initialization: CMD_CONFIG = 0x%08x\n", cmd);
+
+ return 0;
+}
+
+/* Start/stop MAC transmission logic
+ */
+static void tse_set_mac(struct altera_tse_private *priv, bool enable)
+{
+ struct altera_tse_mac *mac = priv->mac_dev;
+ u32 value = ioread32(&mac->command_config);
+
+ if (enable)
+ value |= MAC_CMDCFG_TX_ENA | MAC_CMDCFG_RX_ENA;
+ else
+ value &= ~(MAC_CMDCFG_TX_ENA | MAC_CMDCFG_RX_ENA);
+
+ iowrite32(value, &mac->command_config);
+}
+
+/* Change the MTU
+ */
+static int tse_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ unsigned int max_mtu = priv->max_mtu;
+ unsigned int min_mtu = ETH_ZLEN + ETH_FCS_LEN;
+
+ if (netif_running(dev)) {
+ netdev_err(dev, "must be stopped to change its MTU\n");
+ return -EBUSY;
+ }
+
+ if ((new_mtu < min_mtu) || (new_mtu > max_mtu)) {
+ netdev_err(dev, "invalid MTU, max MTU is: %u\n", max_mtu);
+ return -EINVAL;
+ }
+
+ dev->mtu = new_mtu;
+ netdev_update_features(dev);
+
+ return 0;
+}
+
+static void altera_tse_set_mcfilter(struct net_device *dev)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ struct altera_tse_mac *mac = (struct altera_tse_mac *)priv->mac_dev;
+ int i;
+ struct netdev_hw_addr *ha;
+
+ /* clear the hash filter */
+ for (i = 0; i < 64; i++)
+ iowrite32(0, &(mac->hash_table[i]));
+
+ netdev_for_each_mc_addr(ha, dev) {
+ unsigned int hash = 0;
+ int mac_octet;
+
+ for (mac_octet = 5; mac_octet >= 0; mac_octet--) {
+ unsigned char xor_bit = 0;
+ unsigned char octet = ha->addr[mac_octet];
+ unsigned int bitshift;
+
+ for (bitshift = 0; bitshift < 8; bitshift++)
+ xor_bit ^= ((octet >> bitshift) & 0x01);
+
+ hash = (hash << 1) | xor_bit;
+ }
+ iowrite32(1, &(mac->hash_table[hash]));
+ }
+}
+
+
+static void altera_tse_set_mcfilterall(struct net_device *dev)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ struct altera_tse_mac *mac = (struct altera_tse_mac *)priv->mac_dev;
+ int i;
+
+ /* set the hash filter */
+ for (i = 0; i < 64; i++)
+ iowrite32(1, &(mac->hash_table[i]));
+}
+
+/* Set or clear the multicast filter for this adaptor
+ */
+static void tse_set_rx_mode_hashfilter(struct net_device *dev)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ struct altera_tse_mac *mac = priv->mac_dev;
+
+ spin_lock(&priv->mac_cfg_lock);
+
+ if (dev->flags & IFF_PROMISC)
+ tse_set_bit(&mac->command_config, MAC_CMDCFG_PROMIS_EN);
+
+ if (dev->flags & IFF_ALLMULTI)
+ altera_tse_set_mcfilterall(dev);
+ else
+ altera_tse_set_mcfilter(dev);
+
+ spin_unlock(&priv->mac_cfg_lock);
+}
+
+/* Set or clear the multicast filter for this adaptor
+ */
+static void tse_set_rx_mode(struct net_device *dev)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ struct altera_tse_mac *mac = priv->mac_dev;
+
+ spin_lock(&priv->mac_cfg_lock);
+
+ if ((dev->flags & IFF_PROMISC) || (dev->flags & IFF_ALLMULTI) ||
+ !netdev_mc_empty(dev) || !netdev_uc_empty(dev))
+ tse_set_bit(&mac->command_config, MAC_CMDCFG_PROMIS_EN);
+ else
+ tse_clear_bit(&mac->command_config, MAC_CMDCFG_PROMIS_EN);
+
+ spin_unlock(&priv->mac_cfg_lock);
+}
+
+/* Open and initialize the interface
+ */
+static int tse_open(struct net_device *dev)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ int ret = 0;
+ int i;
+ unsigned long int flags;
+
+ /* Reset and configure TSE MAC and probe associated PHY */
+ ret = priv->dmaops->init_dma(priv);
+ if (ret != 0) {
+ netdev_err(dev, "Cannot initialize DMA\n");
+ goto phy_error;
+ }
+
+ if (netif_msg_ifup(priv))
+ netdev_warn(dev, "device MAC address %pM\n",
+ dev->dev_addr);
+
+ if ((priv->revision < 0xd00) || (priv->revision > 0xe00))
+ netdev_warn(dev, "TSE revision %x\n", priv->revision);
+
+ spin_lock(&priv->mac_cfg_lock);
+ ret = reset_mac(priv);
+ if (ret)
+ netdev_err(dev, "Cannot reset MAC core (error: %d)\n", ret);
+
+ ret = init_mac(priv);
+ spin_unlock(&priv->mac_cfg_lock);
+ if (ret) {
+ netdev_err(dev, "Cannot init MAC core (error: %d)\n", ret);
+ goto alloc_skbuf_error;
+ }
+
+ priv->dmaops->reset_dma(priv);
+
+ /* Create and initialize the TX/RX descriptors chains. */
+ priv->rx_ring_size = dma_rx_num;
+ priv->tx_ring_size = dma_tx_num;
+ ret = alloc_init_skbufs(priv);
+ if (ret) {
+ netdev_err(dev, "DMA descriptors initialization failed\n");
+ goto alloc_skbuf_error;
+ }
+
+
+ /* Register RX interrupt */
+ ret = request_irq(priv->rx_irq, altera_isr, IRQF_SHARED,
+ dev->name, dev);
+ if (ret) {
+ netdev_err(dev, "Unable to register RX interrupt %d\n",
+ priv->rx_irq);
+ goto init_error;
+ }
+
+ /* Register TX interrupt */
+ ret = request_irq(priv->tx_irq, altera_isr, IRQF_SHARED,
+ dev->name, dev);
+ if (ret) {
+ netdev_err(dev, "Unable to register TX interrupt %d\n",
+ priv->tx_irq);
+ goto tx_request_irq_error;
+ }
+
+ /* Enable DMA interrupts */
+ spin_lock_irqsave(&priv->rxdma_irq_lock, flags);
+ priv->dmaops->enable_rxirq(priv);
+ priv->dmaops->enable_txirq(priv);
+
+ /* Setup RX descriptor chain */
+ for (i = 0; i < priv->rx_ring_size; i++)
+ priv->dmaops->add_rx_desc(priv, &priv->rx_ring[i]);
+
+ spin_unlock_irqrestore(&priv->rxdma_irq_lock, flags);
+
+ /* Start MAC Rx/Tx */
+ spin_lock(&priv->mac_cfg_lock);
+ tse_set_mac(priv, true);
+ spin_unlock(&priv->mac_cfg_lock);
+
+ if (priv->phydev)
+ phy_start(priv->phydev);
+
+ napi_enable(&priv->napi);
+ netif_start_queue(dev);
+
+ return 0;
+
+tx_request_irq_error:
+ free_irq(priv->rx_irq, dev);
+init_error:
+ free_skbufs(dev);
+alloc_skbuf_error:
+ if (priv->phydev) {
+ phy_disconnect(priv->phydev);
+ priv->phydev = NULL;
+ }
+phy_error:
+ return ret;
+}
+
+/* Stop TSE MAC interface and put the device in an inactive state
+ */
+static int tse_shutdown(struct net_device *dev)
+{
+ struct altera_tse_private *priv = netdev_priv(dev);
+ int ret;
+ unsigned long int flags;
+
+ /* Stop and disconnect the PHY */
+ if (priv->phydev) {
+ phy_stop(priv->phydev);
+ phy_disconnect(priv->phydev);
+ priv->phydev = NULL;
+ }
+
+ netif_stop_queue(dev);
+ napi_disable(&priv->napi);
+
+ /* Disable DMA interrupts */
+ spin_lock_irqsave(&priv->rxdma_irq_lock, flags);
+ priv->dmaops->disable_rxirq(priv);
+ priv->dmaops->disable_txirq(priv);
+ spin_unlock_irqrestore(&priv->rxdma_irq_lock, flags);
+
+ /* Free the IRQ lines */
+ free_irq(priv->rx_irq, dev);
+ free_irq(priv->tx_irq, dev);
+
+ /* disable and reset the MAC, empties fifo */
+ spin_lock(&priv->mac_cfg_lock);
+ spin_lock(&priv->tx_lock);
+
+ ret = reset_mac(priv);
+ if (ret)
+ netdev_err(dev, "Cannot reset MAC core (error: %d)\n", ret);
+ priv->dmaops->reset_dma(priv);
+ free_skbufs(dev);
+
+ spin_unlock(&priv->tx_lock);
+ spin_unlock(&priv->mac_cfg_lock);
+
+ priv->dmaops->uninit_dma(priv);
+
+ return 0;
+}
+
+static struct net_device_ops altera_tse_netdev_ops = {
+ .ndo_open = tse_open,
+ .ndo_stop = tse_shutdown,
+ .ndo_start_xmit = tse_start_xmit,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_set_rx_mode = tse_set_rx_mode,
+ .ndo_change_mtu = tse_change_mtu,
+ .ndo_validate_addr = eth_validate_addr,
+};
+
+
+static int request_and_map(struct platform_device *pdev, const char *name,
+ struct resource **res, void __iomem **ptr)
+{
+ struct resource *region;
+ struct device *device = &pdev->dev;
+
+ *res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
+ if (*res == NULL) {
+ dev_err(device, "resource %s not defined\n", name);
+ return -ENODEV;
+ }
+
+ region = devm_request_mem_region(device, (*res)->start,
+ resource_size(*res), dev_name(device));
+ if (region == NULL) {
+ dev_err(device, "unable to request %s\n", name);
+ return -EBUSY;
+ }
+
+ *ptr = devm_ioremap_nocache(device, region->start,
+ resource_size(region));
+ if (*ptr == NULL) {
+ dev_err(device, "ioremap_nocache of %s failed!", name);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/* Probe Altera TSE MAC device
+ */
+static int altera_tse_probe(struct platform_device *pdev)
+{
+ struct net_device *ndev;
+ int ret = -ENODEV;
+ struct resource *control_port;
+ struct resource *dma_res;
+ struct altera_tse_private *priv;
+ const unsigned char *macaddr;
+ struct device_node *np = pdev->dev.of_node;
+ void __iomem *descmap;
+ const struct of_device_id *of_id = NULL;
+
+ ndev = alloc_etherdev(sizeof(struct altera_tse_private));
+ if (!ndev) {
+ dev_err(&pdev->dev, "Could not allocate network device\n");
+ return -ENODEV;
+ }
+
+ SET_NETDEV_DEV(ndev, &pdev->dev);
+
+ priv = netdev_priv(ndev);
+ priv->device = &pdev->dev;
+ priv->dev = ndev;
+ priv->msg_enable = netif_msg_init(debug, default_msg_level);
+
+ of_id = of_match_device(altera_tse_ids, &pdev->dev);
+
+ if (of_id)
+ priv->dmaops = (struct altera_dmaops *)of_id->data;
+
+
+ if (priv->dmaops &&
+ priv->dmaops->altera_dtype == ALTERA_DTYPE_SGDMA) {
+ /* Get the mapped address to the SGDMA descriptor memory */
+ ret = request_and_map(pdev, "s1", &dma_res, &descmap);
+ if (ret)
+ goto out_free;
+
+ /* Start of that memory is for transmit descriptors */
+ priv->tx_dma_desc = descmap;
+
+ /* First half is for tx descriptors, other half for tx */
+ priv->txdescmem = resource_size(dma_res)/2;
+
+ priv->txdescmem_busaddr = (dma_addr_t)dma_res->start;
+
+ priv->rx_dma_desc = (void __iomem *)((uintptr_t)(descmap +
+ priv->txdescmem));
+ priv->rxdescmem = resource_size(dma_res)/2;
+ priv->rxdescmem_busaddr = dma_res->start;
+ priv->rxdescmem_busaddr += priv->txdescmem;
+
+ if (upper_32_bits(priv->rxdescmem_busaddr)) {
+ dev_dbg(priv->device,
+ "SGDMA bus addresses greater than 32-bits\n");
+ goto out_free;
+ }
+ if (upper_32_bits(priv->txdescmem_busaddr)) {
+ dev_dbg(priv->device,
+ "SGDMA bus addresses greater than 32-bits\n");
+ goto out_free;
+ }
+ } else if (priv->dmaops &&
+ priv->dmaops->altera_dtype == ALTERA_DTYPE_MSGDMA) {
+ ret = request_and_map(pdev, "rx_resp", &dma_res,
+ &priv->rx_dma_resp);
+ if (ret)
+ goto out_free;
+
+ ret = request_and_map(pdev, "tx_desc", &dma_res,
+ &priv->tx_dma_desc);
+ if (ret)
+ goto out_free;
+
+ priv->txdescmem = resource_size(dma_res);
+ priv->txdescmem_busaddr = dma_res->start;
+
+ ret = request_and_map(pdev, "rx_desc", &dma_res,
+ &priv->rx_dma_desc);
+ if (ret)
+ goto out_free;
+
+ priv->rxdescmem = resource_size(dma_res);
+ priv->rxdescmem_busaddr = dma_res->start;
+
+ } else {
+ goto out_free;
+ }
+
+ if (!dma_set_mask(priv->device, DMA_BIT_MASK(priv->dmaops->dmamask)))
+ dma_set_coherent_mask(priv->device,
+ DMA_BIT_MASK(priv->dmaops->dmamask));
+ else if (!dma_set_mask(priv->device, DMA_BIT_MASK(32)))
+ dma_set_coherent_mask(priv->device, DMA_BIT_MASK(32));
+ else
+ goto out_free;
+
+ /* MAC address space */
+ ret = request_and_map(pdev, "control_port", &control_port,
+ (void __iomem **)&priv->mac_dev);
+ if (ret)
+ goto out_free;
+
+ /* xSGDMA Rx Dispatcher address space */
+ ret = request_and_map(pdev, "rx_csr", &dma_res,
+ &priv->rx_dma_csr);
+ if (ret)
+ goto out_free;
+
+
+ /* xSGDMA Tx Dispatcher address space */
+ ret = request_and_map(pdev, "tx_csr", &dma_res,
+ &priv->tx_dma_csr);
+ if (ret)
+ goto out_free;
+
+
+ /* Rx IRQ */
+ priv->rx_irq = platform_get_irq_byname(pdev, "rx_irq");
+ if (priv->rx_irq == -ENXIO) {
+ dev_err(&pdev->dev, "cannot obtain Rx IRQ\n");
+ ret = -ENXIO;
+ goto out_free;
+ }
+
+ /* Tx IRQ */
+ priv->tx_irq = platform_get_irq_byname(pdev, "tx_irq");
+ if (priv->tx_irq == -ENXIO) {
+ dev_err(&pdev->dev, "cannot obtain Tx IRQ\n");
+ ret = -ENXIO;
+ goto out_free;
+ }
+
+ /* get FIFO depths from device tree */
+ if (of_property_read_u32(pdev->dev.of_node, "rx-fifo-depth",
+ &priv->rx_fifo_depth)) {
+ dev_err(&pdev->dev, "cannot obtain rx-fifo-depth\n");
+ ret = -ENXIO;
+ goto out_free;
+ }
+
+ if (of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth",
+ &priv->rx_fifo_depth)) {
+ dev_err(&pdev->dev, "cannot obtain tx-fifo-depth\n");
+ ret = -ENXIO;
+ goto out_free;
+ }
+
+ /* get hash filter settings for this instance */
+ priv->hash_filter =
+ of_property_read_bool(pdev->dev.of_node,
+ "altr,has-hash-multicast-filter");
+
+ /* get supplemental address settings for this instance */
+ priv->added_unicast =
+ of_property_read_bool(pdev->dev.of_node,
+ "altr,has-supplementary-unicast");
+
+ /* Max MTU is 1500, ETH_DATA_LEN */
+ priv->max_mtu = ETH_DATA_LEN;
+
+ /* Get the max mtu from the device tree. Note that the
+ * "max-frame-size" parameter is actually max mtu. Definition
+ * in the ePAPR v1.1 spec and usage differ, so go with usage.
+ */
+ of_property_read_u32(pdev->dev.of_node, "max-frame-size",
+ &priv->max_mtu);
+
+ /* The DMA buffer size already accounts for an alignment bias
+ * to avoid unaligned access exceptions for the NIOS processor,
+ */
+ priv->rx_dma_buf_sz = ALTERA_RXDMABUFFER_SIZE;
+
+ /* get default MAC address from device tree */
+ macaddr = of_get_mac_address(pdev->dev.of_node);
+ if (macaddr)
+ ether_addr_copy(ndev->dev_addr, macaddr);
+ else
+ eth_hw_addr_random(ndev);
+
+ priv->phy_iface = of_get_phy_mode(np);
+
+ /* try to get PHY address from device tree, use PHY autodetection if
+ * no valid address is given
+ */
+ if (of_property_read_u32(pdev->dev.of_node, "phy-addr",
+ &priv->phy_addr)) {
+ priv->phy_addr = POLL_PHY;
+ }
+
+ if (!((priv->phy_addr == POLL_PHY) ||
+ ((priv->phy_addr >= 0) && (priv->phy_addr < PHY_MAX_ADDR)))) {
+ dev_err(&pdev->dev, "invalid phy-addr specified %d\n",
+ priv->phy_addr);
+ goto out_free;
+ }
+
+ /* Create/attach to MDIO bus */
+ ret = altera_tse_mdio_create(ndev,
+ atomic_add_return(1, &instance_count));
+
+ if (ret)
+ goto out_free;
+
+ /* initialize netdev */
+ ether_setup(ndev);
+ ndev->mem_start = control_port->start;
+ ndev->mem_end = control_port->end;
+ ndev->netdev_ops = &altera_tse_netdev_ops;
+ altera_tse_set_ethtool_ops(ndev);
+
+ altera_tse_netdev_ops.ndo_set_rx_mode = tse_set_rx_mode;
+
+ if (priv->hash_filter)
+ altera_tse_netdev_ops.ndo_set_rx_mode =
+ tse_set_rx_mode_hashfilter;
+
+ /* Scatter/gather IO is not supported,
+ * so it is turned off
+ */
+ ndev->hw_features &= ~NETIF_F_SG;
+ ndev->features |= ndev->hw_features | NETIF_F_HIGHDMA;
+
+ /* VLAN offloading of tagging, stripping and filtering is not
+ * supported by hardware, but driver will accommodate the
+ * extra 4-byte VLAN tag for processing by upper layers
+ */
+ ndev->features |= NETIF_F_HW_VLAN_CTAG_RX;
+
+ /* setup NAPI interface */
+ netif_napi_add(ndev, &priv->napi, tse_poll, NAPI_POLL_WEIGHT);
+
+ spin_lock_init(&priv->mac_cfg_lock);
+ spin_lock_init(&priv->tx_lock);
+ spin_lock_init(&priv->rxdma_irq_lock);
+
+ ret = register_netdev(ndev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register TSE net device\n");
+ goto out_free_mdio;
+ }
+
+ platform_set_drvdata(pdev, ndev);
+
+ priv->revision = ioread32(&priv->mac_dev->megacore_revision);
+
+ if (netif_msg_probe(priv))
+ dev_info(&pdev->dev, "Altera TSE MAC version %d.%d at 0x%08lx irq %d/%d\n",
+ (priv->revision >> 8) & 0xff,
+ priv->revision & 0xff,
+ (unsigned long) control_port->start, priv->rx_irq,
+ priv->tx_irq);
+
+ ret = init_phy(ndev);
+ if (ret != 0) {
+ netdev_err(ndev, "Cannot attach to PHY (error: %d)\n", ret);
+ goto out_free_mdio;
+ }
+ return 0;
+
+out_free_mdio:
+ altera_tse_mdio_destroy(ndev);
+out_free:
+ free_netdev(ndev);
+ return ret;
+}
+
+/* Remove Altera TSE MAC device
+ */
+static int altera_tse_remove(struct platform_device *pdev)
+{
+ struct net_device *ndev = platform_get_drvdata(pdev);
+
+ platform_set_drvdata(pdev, NULL);
+ altera_tse_mdio_destroy(ndev);
+ unregister_netdev(ndev);
+ free_netdev(ndev);
+
+ return 0;
+}
+
+struct altera_dmaops altera_dtype_sgdma = {
+ .altera_dtype = ALTERA_DTYPE_SGDMA,
+ .dmamask = 32,
+ .reset_dma = sgdma_reset,
+ .enable_txirq = sgdma_enable_txirq,
+ .enable_rxirq = sgdma_enable_rxirq,
+ .disable_txirq = sgdma_disable_txirq,
+ .disable_rxirq = sgdma_disable_rxirq,
+ .clear_txirq = sgdma_clear_txirq,
+ .clear_rxirq = sgdma_clear_rxirq,
+ .tx_buffer = sgdma_tx_buffer,
+ .tx_completions = sgdma_tx_completions,
+ .add_rx_desc = sgdma_add_rx_desc,
+ .get_rx_status = sgdma_rx_status,
+ .init_dma = sgdma_initialize,
+ .uninit_dma = sgdma_uninitialize,
+};
+
+struct altera_dmaops altera_dtype_msgdma = {
+ .altera_dtype = ALTERA_DTYPE_MSGDMA,
+ .dmamask = 64,
+ .reset_dma = msgdma_reset,
+ .enable_txirq = msgdma_enable_txirq,
+ .enable_rxirq = msgdma_enable_rxirq,
+ .disable_txirq = msgdma_disable_txirq,
+ .disable_rxirq = msgdma_disable_rxirq,
+ .clear_txirq = msgdma_clear_txirq,
+ .clear_rxirq = msgdma_clear_rxirq,
+ .tx_buffer = msgdma_tx_buffer,
+ .tx_completions = msgdma_tx_completions,
+ .add_rx_desc = msgdma_add_rx_desc,
+ .get_rx_status = msgdma_rx_status,
+ .init_dma = msgdma_initialize,
+ .uninit_dma = msgdma_uninitialize,
+};
+
+static struct of_device_id altera_tse_ids[] = {
+ { .compatible = "altr,tse-msgdma-1.0", .data = &altera_dtype_msgdma, },
+ { .compatible = "altr,tse-1.0", .data = &altera_dtype_sgdma, },
+ { .compatible = "ALTR,tse-1.0", .data = &altera_dtype_sgdma, },
+ {},
+};
+MODULE_DEVICE_TABLE(of, altera_tse_ids);
+
+static struct platform_driver altera_tse_driver = {
+ .probe = altera_tse_probe,
+ .remove = altera_tse_remove,
+ .suspend = NULL,
+ .resume = NULL,
+ .driver = {
+ .name = ALTERA_TSE_RESOURCE_NAME,
+ .owner = THIS_MODULE,
+ .of_match_table = altera_tse_ids,
+ },
+};
+
+module_platform_driver(altera_tse_driver);
+
+MODULE_AUTHOR("Altera Corporation");
+MODULE_DESCRIPTION("Altera Triple Speed Ethernet MAC driver");
+MODULE_LICENSE("GPL v2");
--- /dev/null
+/* Altera TSE SGDMA and MSGDMA Linux driver
+ * Copyright (C) 2014 Altera Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "altera_tse.h"
+#include "altera_utils.h"
+
+void tse_set_bit(void __iomem *ioaddr, u32 bit_mask)
+{
+ u32 value = ioread32(ioaddr);
+ value |= bit_mask;
+ iowrite32(value, ioaddr);
+}
+
+void tse_clear_bit(void __iomem *ioaddr, u32 bit_mask)
+{
+ u32 value = ioread32(ioaddr);
+ value &= ~bit_mask;
+ iowrite32(value, ioaddr);
+}
+
+int tse_bit_is_set(void __iomem *ioaddr, u32 bit_mask)
+{
+ u32 value = ioread32(ioaddr);
+ return (value & bit_mask) ? 1 : 0;
+}
+
+int tse_bit_is_clear(void __iomem *ioaddr, u32 bit_mask)
+{
+ u32 value = ioread32(ioaddr);
+ return (value & bit_mask) ? 0 : 1;
+}
--- /dev/null
+/* Altera TSE SGDMA and MSGDMA Linux driver
+ * Copyright (C) 2014 Altera Corporation. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+
+#ifndef __ALTERA_UTILS_H__
+#define __ALTERA_UTILS_H__
+
+void tse_set_bit(void __iomem *ioaddr, u32 bit_mask);
+void tse_clear_bit(void __iomem *ioaddr, u32 bit_mask);
+int tse_bit_is_set(void __iomem *ioaddr, u32 bit_mask);
+int tse_bit_is_clear(void __iomem *ioaddr, u32 bit_mask);
+
+#endif /* __ALTERA_UTILS_H__*/
MODULE_DEVICE_TABLE(pci, e1000_pci_tbl);
static const struct dev_pm_ops e1000_pm_ops = {
+#ifdef CONFIG_PM_SLEEP
.suspend = e1000e_pm_suspend,
.resume = e1000e_pm_resume,
.freeze = e1000e_pm_freeze,
.thaw = e1000e_pm_thaw,
.poweroff = e1000e_pm_suspend,
.restore = e1000e_pm_resume,
+#endif
SET_RUNTIME_PM_OPS(e1000e_pm_runtime_suspend, e1000e_pm_runtime_resume,
e1000e_pm_runtime_idle)
};
np = netdev_priv(netdev);
np->vsi = vsi;
- netdev->hw_enc_features = NETIF_F_IP_CSUM |
+ netdev->hw_enc_features |= NETIF_F_IP_CSUM |
NETIF_F_GSO_UDP_TUNNEL |
- NETIF_F_TSO |
- NETIF_F_SG;
+ NETIF_F_TSO;
netdev->features = NETIF_F_SG |
NETIF_F_IP_CSUM |
}
-/* The i40e_ptype_lookup table is used to convert from the 8-bit ptype in the
+/* The i40evf_ptype_lookup table is used to convert from the 8-bit ptype in the
* hardware to a bit-field that can be used by SW to more easily determine the
* packet type.
*
*
* Typical work flow:
*
- * IF NOT i40e_ptype_lookup[ptype].known
+ * IF NOT i40evf_ptype_lookup[ptype].known
* THEN
* Packet is unknown
- * ELSE IF i40e_ptype_lookup[ptype].outer_ip == I40E_RX_PTYPE_OUTER_IP
+ * ELSE IF i40evf_ptype_lookup[ptype].outer_ip == I40E_RX_PTYPE_OUTER_IP
* Use the rest of the fields to look at the tunnels, inner protocols, etc
* ELSE
* Use the enum i40e_rx_l2_ptype to decode the packet type
#define I40E_RX_PTYPE_INNER_PROT_TS I40E_RX_PTYPE_INNER_PROT_TIMESYNC
/* Lookup table mapping the HW PTYPE to the bit field for decoding */
-struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = {
+struct i40e_rx_ptype_decoded i40evf_ptype_lookup[] = {
/* L2 Packet types */
I40E_PTT_UNUSED_ENTRY(0),
I40E_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
i40e_status i40e_set_mac_type(struct i40e_hw *hw);
-extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[];
+extern struct i40e_rx_ptype_decoded i40evf_ptype_lookup[];
static inline struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
{
- return i40e_ptype_lookup[ptype];
+ return i40evf_ptype_lookup[ptype];
}
/* prototype for functions used for SW locks */
* were determined based on theoretical maximum wire speed and testing
* data, in order to minimize response time while increasing bulk
* throughput.
- * This functionality is controlled by the InterruptThrottleRate module
- * parameter (see igb_param.c)
+ * This functionality is controlled by ethtool's coalescing settings.
* NOTE: This function is called only when operating in a multiqueue
* receive environment.
**/
* based on theoretical maximum wire speed and thresholds were set based
* on testing data as well as attempting to minimize response time
* while increasing bulk throughput.
- * this functionality is controlled by the InterruptThrottleRate module
- * parameter (see igb_param.c)
+ * This functionality is controlled by ethtool's coalescing settings.
* NOTE: These calculations are only valid when operating in a single-
* queue environment.
**/
value = 0x0;
break;
default:
- pr_warn("PHY interface mode was not setup. Set to MII.\n");
+ netdev_warn(ndev,
+ "PHY interface mode was not setup. Set to MII.\n");
value = 0x1;
break;
}
cnt--;
}
if (cnt <= 0) {
- pr_err("Device reset failed\n");
+ netdev_err(ndev, "Device reset failed\n");
ret = -ETIMEDOUT;
}
return ret;
/* Unused write back interrupt */
if (intr_status & EESR_TABT) { /* Transmit Abort int */
ndev->stats.tx_aborted_errors++;
- if (netif_msg_tx_err(mdp))
- dev_err(&ndev->dev, "Transmit Abort\n");
+ netif_err(mdp, tx_err, ndev, "Transmit Abort\n");
}
}
if (intr_status & EESR_RFRMER) {
/* Receive Frame Overflow int */
ndev->stats.rx_frame_errors++;
- if (netif_msg_rx_err(mdp))
- dev_err(&ndev->dev, "Receive Abort\n");
+ netif_err(mdp, rx_err, ndev, "Receive Abort\n");
}
}
if (intr_status & EESR_TDE) {
/* Transmit Descriptor Empty int */
ndev->stats.tx_fifo_errors++;
- if (netif_msg_tx_err(mdp))
- dev_err(&ndev->dev, "Transmit Descriptor Empty\n");
+ netif_err(mdp, tx_err, ndev, "Transmit Descriptor Empty\n");
}
if (intr_status & EESR_TFE) {
/* FIFO under flow */
ndev->stats.tx_fifo_errors++;
- if (netif_msg_tx_err(mdp))
- dev_err(&ndev->dev, "Transmit FIFO Under flow\n");
+ netif_err(mdp, tx_err, ndev, "Transmit FIFO Under flow\n");
}
if (intr_status & EESR_RDE) {
/* Receive Descriptor Empty int */
ndev->stats.rx_over_errors++;
-
- if (netif_msg_rx_err(mdp))
- dev_err(&ndev->dev, "Receive Descriptor Empty\n");
+ netif_err(mdp, rx_err, ndev, "Receive Descriptor Empty\n");
}
if (intr_status & EESR_RFE) {
/* Receive FIFO Overflow int */
ndev->stats.rx_fifo_errors++;
- if (netif_msg_rx_err(mdp))
- dev_err(&ndev->dev, "Receive FIFO Overflow\n");
+ netif_err(mdp, rx_err, ndev, "Receive FIFO Overflow\n");
}
if (!mdp->cd->no_ade && (intr_status & EESR_ADE)) {
/* Address Error */
ndev->stats.tx_fifo_errors++;
- if (netif_msg_tx_err(mdp))
- dev_err(&ndev->dev, "Address Error\n");
+ netif_err(mdp, tx_err, ndev, "Address Error\n");
}
mask = EESR_TWB | EESR_TABT | EESR_ADE | EESR_TDE | EESR_TFE;
u32 edtrr = sh_eth_read(ndev, EDTRR);
/* dmesg */
- dev_err(&ndev->dev, "TX error. status=%8.8x cur_tx=%8.8x dirty_tx=%8.8x state=%8.8x EDTRR=%8.8x.\n",
- intr_status, mdp->cur_tx, mdp->dirty_tx,
- (u32)ndev->state, edtrr);
+ netdev_err(ndev, "TX error. status=%8.8x cur_tx=%8.8x dirty_tx=%8.8x state=%8.8x EDTRR=%8.8x.\n",
+ intr_status, mdp->cur_tx, mdp->dirty_tx,
+ (u32)ndev->state, edtrr);
/* dirty buffer free */
sh_eth_txfree(ndev);
EESIPR);
__napi_schedule(&mdp->napi);
} else {
- dev_warn(&ndev->dev,
- "ignoring interrupt, status 0x%08lx, mask 0x%08lx.\n",
- intr_status, intr_enable);
+ netdev_warn(ndev,
+ "ignoring interrupt, status 0x%08lx, mask 0x%08lx.\n",
+ intr_status, intr_enable);
}
}
}
if (IS_ERR(phydev)) {
- dev_err(&ndev->dev, "failed to connect PHY\n");
+ netdev_err(ndev, "failed to connect PHY\n");
return PTR_ERR(phydev);
}
- dev_info(&ndev->dev, "attached PHY %d (IRQ %d) to driver %s\n",
- phydev->addr, phydev->irq, phydev->drv->name);
+ netdev_info(ndev, "attached PHY %d (IRQ %d) to driver %s\n",
+ phydev->addr, phydev->irq, phydev->drv->name);
mdp->phydev = phydev;
ret = sh_eth_ring_init(ndev);
if (ret < 0) {
- dev_err(&ndev->dev, "%s: sh_eth_ring_init failed.\n", __func__);
+ netdev_err(ndev, "%s: sh_eth_ring_init failed.\n", __func__);
return ret;
}
ret = sh_eth_dev_init(ndev, false);
if (ret < 0) {
- dev_err(&ndev->dev, "%s: sh_eth_dev_init failed.\n", __func__);
+ netdev_err(ndev, "%s: sh_eth_dev_init failed.\n", __func__);
return ret;
}
ret = request_irq(ndev->irq, sh_eth_interrupt,
mdp->cd->irq_flags, ndev->name, ndev);
if (ret) {
- dev_err(&ndev->dev, "Can not assign IRQ number\n");
+ netdev_err(ndev, "Can not assign IRQ number\n");
goto out_napi_off;
}
netif_stop_queue(ndev);
- if (netif_msg_timer(mdp)) {
- dev_err(&ndev->dev, "%s: transmit timed out, status %8.8x, resetting...\n",
- ndev->name, (int)sh_eth_read(ndev, EESR));
- }
+ netif_err(mdp, timer, ndev,
+ "transmit timed out, status %8.8x, resetting...\n",
+ (int)sh_eth_read(ndev, EESR));
/* tx_errors count up */
ndev->stats.tx_errors++;
spin_lock_irqsave(&mdp->lock, flags);
if ((mdp->cur_tx - mdp->dirty_tx) >= (mdp->num_tx_ring - 4)) {
if (!sh_eth_txfree(ndev)) {
- if (netif_msg_tx_queued(mdp))
- dev_warn(&ndev->dev, "TxFD exhausted.\n");
+ netif_warn(mdp, tx_queued, ndev, "TxFD exhausted.\n");
netif_stop_queue(ndev);
spin_unlock_irqrestore(&mdp->lock, flags);
return NETDEV_TX_BUSY;
udelay(10);
timeout--;
if (timeout <= 0) {
- dev_err(&ndev->dev, "%s: timeout\n", __func__);
+ netdev_err(ndev, "%s: timeout\n", __func__);
return -ETIMEDOUT;
}
}
reg_offset = sh_eth_offset_fast_sh3_sh2;
break;
default:
- pr_err("Unknown register type (%d)\n", register_type);
break;
}
mdp->cd = (struct sh_eth_cpu_data *)match->data;
}
mdp->reg_offset = sh_eth_get_register_offset(mdp->cd->register_type);
+ if (!mdp->reg_offset) {
+ dev_err(&pdev->dev, "Unknown register type (%d)\n",
+ mdp->cd->register_type);
+ ret = -EINVAL;
+ goto out_release;
+ }
sh_eth_set_default_cpu_data(mdp->cd);
/* set function */
}
/* print device information */
- pr_info("Base address at 0x%x, %pM, IRQ %d.\n",
- (u32)ndev->base_addr, ndev->dev_addr, ndev->irq);
+ netdev_info(ndev, "Base address at 0x%x, %pM, IRQ %d.\n",
+ (u32)ndev->base_addr, ndev->dev_addr, ndev->irq);
platform_set_drvdata(pdev, ndev);
/* The chip-specific entries in the device structure. */
dev->netdev_ops = &rhine_netdev_ops;
- dev->ethtool_ops = &netdev_ethtool_ops,
+ dev->ethtool_ops = &netdev_ethtool_ops;
dev->watchdog_timeo = TX_TIMEOUT;
netif_napi_add(dev, &rp->napi, rhine_napipoll, 64);
#include <linux/spi/spi.h>
#include <linux/spi/at86rf230.h>
#include <linux/skbuff.h>
+#include <linux/of_gpio.h>
#include <net/mac802154.h>
#include <net/wpan-phy.h>
if (rc)
return rc;
- rc = at86rf230_state(dev, STATE_FORCE_TX_ON);
+ rc = at86rf230_state(dev, STATE_TX_ON);
if (rc)
return rc;
status &= ~IRQ_TRX_UR; /* FIXME: possibly handle ???*/
if (status & IRQ_TRX_END) {
- spin_lock_irqsave(&lp->lock, flags);
status &= ~IRQ_TRX_END;
+ spin_lock_irqsave(&lp->lock, flags);
if (lp->is_tx) {
lp->is_tx = 0;
spin_unlock_irqrestore(&lp->lock, flags);
return 0;
}
+static struct at86rf230_platform_data *
+at86rf230_get_pdata(struct spi_device *spi)
+{
+ struct at86rf230_platform_data *pdata;
+ const char *irq_type;
+
+ if (!IS_ENABLED(CONFIG_OF) || !spi->dev.of_node)
+ return spi->dev.platform_data;
+
+ pdata = devm_kzalloc(&spi->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ goto done;
+
+ pdata->rstn = of_get_named_gpio(spi->dev.of_node, "reset-gpio", 0);
+ pdata->slp_tr = of_get_named_gpio(spi->dev.of_node, "sleep-gpio", 0);
+
+ pdata->irq_type = IRQF_TRIGGER_RISING;
+ of_property_read_string(spi->dev.of_node, "irq-type", &irq_type);
+ if (!strcmp(irq_type, "level-high"))
+ pdata->irq_type = IRQF_TRIGGER_HIGH;
+ else if (!strcmp(irq_type, "level-low"))
+ pdata->irq_type = IRQF_TRIGGER_LOW;
+ else if (!strcmp(irq_type, "edge-rising"))
+ pdata->irq_type = IRQF_TRIGGER_RISING;
+ else if (!strcmp(irq_type, "edge-falling"))
+ pdata->irq_type = IRQF_TRIGGER_FALLING;
+ else
+ dev_warn(&spi->dev, "wrong irq-type specified using edge-rising\n");
+
+ spi->dev.platform_data = pdata;
+done:
+ return pdata;
+}
+
static int at86rf230_probe(struct spi_device *spi)
{
struct at86rf230_platform_data *pdata;
return -EINVAL;
}
- pdata = spi->dev.platform_data;
+ pdata = at86rf230_get_pdata(spi);
if (!pdata) {
dev_err(&spi->dev, "no platform_data\n");
return -EINVAL;
}
- rc = gpio_request(pdata->rstn, "rstn");
- if (rc)
- return rc;
+ if (gpio_is_valid(pdata->rstn)) {
+ rc = gpio_request(pdata->rstn, "rstn");
+ if (rc)
+ return rc;
+ }
if (gpio_is_valid(pdata->slp_tr)) {
rc = gpio_request(pdata->slp_tr, "slp_tr");
goto err_slp_tr;
}
- rc = gpio_direction_output(pdata->rstn, 1);
- if (rc)
- goto err_gpio_dir;
+ if (gpio_is_valid(pdata->rstn)) {
+ rc = gpio_direction_output(pdata->rstn, 1);
+ if (rc)
+ goto err_gpio_dir;
+ }
if (gpio_is_valid(pdata->slp_tr)) {
rc = gpio_direction_output(pdata->slp_tr, 0);
}
/* Reset */
- msleep(1);
- gpio_set_value(pdata->rstn, 0);
- msleep(1);
- gpio_set_value(pdata->rstn, 1);
- msleep(1);
+ if (gpio_is_valid(pdata->rstn)) {
+ udelay(1);
+ gpio_set_value(pdata->rstn, 0);
+ udelay(1);
+ gpio_set_value(pdata->rstn, 1);
+ usleep_range(120, 240);
+ }
rc = __at86rf230_detect_device(spi, &man_id, &part, &version);
if (rc < 0)
if (gpio_is_valid(pdata->slp_tr))
gpio_free(pdata->slp_tr);
err_slp_tr:
- gpio_free(pdata->rstn);
+ if (gpio_is_valid(pdata->rstn))
+ gpio_free(pdata->rstn);
return rc;
}
if (gpio_is_valid(pdata->slp_tr))
gpio_free(pdata->slp_tr);
- gpio_free(pdata->rstn);
+ if (gpio_is_valid(pdata->rstn))
+ gpio_free(pdata->rstn);
mutex_destroy(&lp->bmux);
ieee802154_free_device(lp->dev);
return 0;
}
+#if IS_ENABLED(CONFIG_OF)
+static struct of_device_id at86rf230_of_match[] = {
+ { .compatible = "atmel,at86rf230", },
+ { .compatible = "atmel,at86rf231", },
+ { .compatible = "atmel,at86rf233", },
+ { .compatible = "atmel,at86rf212", },
+ { },
+};
+#endif
+
static struct spi_driver at86rf230_driver = {
.driver = {
+ .of_match_table = of_match_ptr(at86rf230_of_match),
.name = "at86rf230",
.owner = THIS_MODULE,
},
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
int netdev_get_name(struct net *net, char *name, int ifindex);
int dev_restart(struct net_device *dev);
-#ifdef CONFIG_NETPOLL_TRAP
-int netpoll_trap(void);
-#endif
int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb);
static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
{
-#ifdef CONFIG_NETPOLL_TRAP
- if (netpoll_trap()) {
- netif_tx_start_queue(dev_queue);
- return;
- }
-#endif
if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state))
__netif_schedule(dev_queue->qdisc);
}
static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
{
struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
-#ifdef CONFIG_NETPOLL_TRAP
- if (netpoll_trap())
- return;
-#endif
netif_tx_stop_queue(txq);
}
static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
{
struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
-#ifdef CONFIG_NETPOLL_TRAP
- if (netpoll_trap())
- return;
-#endif
if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state))
__netif_schedule(txq->qdisc);
}
IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG),
IPSET_TYPE_IFACE_FLAG = 5,
IPSET_TYPE_IFACE = (1 << IPSET_TYPE_IFACE_FLAG),
- IPSET_TYPE_NOMATCH_FLAG = 6,
+ IPSET_TYPE_MARK_FLAG = 6,
+ IPSET_TYPE_MARK = (1 << IPSET_TYPE_MARK_FLAG),
+ IPSET_TYPE_NOMATCH_FLAG = 7,
IPSET_TYPE_NOMATCH = (1 << IPSET_TYPE_NOMATCH_FLAG),
/* Strictly speaking not a feature, but a flag for dumping:
* this settype must be dumped last */
- IPSET_DUMP_LAST_FLAG = 7,
+ IPSET_DUMP_LAST_FLAG = 8,
IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG),
};
#define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT)
#define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER)
#define SET_WITH_COMMENT(s) ((s)->extensions & IPSET_EXT_COMMENT)
+#define SET_WITH_FORCEADD(s) ((s)->flags & IPSET_CREATE_FLAG_FORCEADD)
/* Extension id, in size order */
enum ip_set_ext_id {
char name[IPSET_MAXNAMELEN];
/* Protocol version */
u8 protocol;
- /* Set features to control swapping */
- u8 features;
/* Set type dimension */
u8 dimension;
/*
u8 family;
/* Type revisions */
u8 revision_min, revision_max;
+ /* Set features to control swapping */
+ u16 features;
/* Create set */
int (*create)(struct net *net, struct ip_set *set,
u8 revision;
/* Extensions */
u8 extensions;
+ /* Create flags */
+ u8 flags;
/* Default timeout value, if enabled */
u32 timeout;
/* Element data size */
cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
if (SET_WITH_COMMENT(set))
cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+ if (SET_WITH_FORCEADD(set))
+ cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
if (!cadt_flags)
return 0;
void nfnl_lock(__u8 subsys_id);
void nfnl_unlock(__u8 subsys_id);
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_nfnl_is_held(__u8 subsys_id);
+#else
+static inline int lockdep_nfnl_is_held(__u8 subsys_id)
+{
+ return 1;
+}
+#endif /* CONFIG_PROVE_LOCKING */
+
+/*
+ * nfnl_dereference - fetch RCU pointer when updates are prevented by subsys mutex
+ *
+ * @p: The pointer to read, prior to dereferencing
+ * @ss: The nfnetlink subsystem ID
+ *
+ * Return the value of the specified RCU-protected pointer, but omit
+ * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
+ * caller holds the NFNL subsystem mutex.
+ */
+#define nfnl_dereference(p, ss) \
+ rcu_dereference_protected(p, lockdep_nfnl_is_held(ss))
#define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
struct net_device *dev;
char dev_name[IFNAMSIZ];
const char *name;
- void (*rx_skb_hook)(struct netpoll *np, int source, struct sk_buff *skb,
- int offset, int len);
union inet_addr local_ip, remote_ip;
bool ipv6;
u16 local_port, remote_port;
u8 remote_mac[ETH_ALEN];
- struct list_head rx; /* rx_np list element */
struct work_struct cleanup_work;
};
struct netpoll_info {
atomic_t refcnt;
- unsigned long rx_flags;
- spinlock_t rx_lock;
struct semaphore dev_lock;
- struct list_head rx_np; /* netpolls that registered an rx_skb_hook */
- struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */
struct sk_buff_head txq;
struct delayed_work tx_work;
int netpoll_parse_options(struct netpoll *np, char *opt);
int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp);
int netpoll_setup(struct netpoll *np);
-int netpoll_trap(void);
-void netpoll_set_trap(int trap);
void __netpoll_cleanup(struct netpoll *np);
void __netpoll_free_async(struct netpoll *np);
void netpoll_cleanup(struct netpoll *np);
-int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo);
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
struct net_device *dev);
static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
local_irq_restore(flags);
}
-
-
#ifdef CONFIG_NETPOLL
-static inline bool netpoll_rx_on(struct sk_buff *skb)
-{
- struct netpoll_info *npinfo = rcu_dereference_bh(skb->dev->npinfo);
-
- return npinfo && (!list_empty(&npinfo->rx_np) || npinfo->rx_flags);
-}
-
-static inline bool netpoll_rx(struct sk_buff *skb)
-{
- struct netpoll_info *npinfo;
- unsigned long flags;
- bool ret = false;
-
- local_irq_save(flags);
-
- if (!netpoll_rx_on(skb))
- goto out;
-
- npinfo = rcu_dereference_bh(skb->dev->npinfo);
- spin_lock(&npinfo->rx_lock);
- /* check rx_flags again with the lock held */
- if (npinfo->rx_flags && __netpoll_rx(skb, npinfo))
- ret = true;
- spin_unlock(&npinfo->rx_lock);
-
-out:
- local_irq_restore(flags);
- return ret;
-}
-
-static inline int netpoll_receive_skb(struct sk_buff *skb)
-{
- if (!list_empty(&skb->dev->napi_list))
- return netpoll_rx(skb);
- return 0;
-}
-
static inline void *netpoll_poll_lock(struct napi_struct *napi)
{
struct net_device *dev = napi->dev;
}
#else
-static inline bool netpoll_rx(struct sk_buff *skb)
-{
- return false;
-}
-static inline bool netpoll_rx_on(struct sk_buff *skb)
-{
- return false;
-}
-static inline int netpoll_receive_skb(struct sk_buff *skb)
-{
- return 0;
-}
static inline void *netpoll_poll_lock(struct napi_struct *napi)
{
return NULL;
struct nf_conn {
/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
- plus 1 for any connection(s) we are `master' for */
+ * plus 1 for any connection(s) we are `master' for
+ *
+ * Hint, SKB address this struct and refcnt via skb->nfct and
+ * helpers nf_conntrack_get() and nf_conntrack_put().
+ * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt,
+ * beware nf_ct_get() is different and don't inc refcnt.
+ */
struct nf_conntrack ct_general;
- spinlock_t lock;
+ spinlock_t lock;
+ u16 cpu;
/* XXX should I move this to the tail ? - Y.K */
/* These are my tuples; original and reply */
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *proto);
-extern spinlock_t nf_conntrack_lock ;
+#ifdef CONFIG_LOCKDEP
+# define CONNTRACK_LOCKS 8
+#else
+# define CONNTRACK_LOCKS 1024
+#endif
+extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
+
+extern spinlock_t nf_conntrack_expect_lock;
#endif /* _NF_CONNTRACK_CORE_H */
#include <uapi/linux/netfilter/xt_connlabel.h>
+#define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
+
struct nf_conn_labels {
u8 words;
unsigned long bits[];
u8 words;
words = ACCESS_ONCE(net->ct.label_words);
- if (words == 0 || WARN_ON_ONCE(words > 8))
+ if (words == 0)
return NULL;
cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
#include <linux/list.h>
#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netlink.h>
int (*init)(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[]);
- void (*destroy)(const struct nft_expr *expr);
+ void (*destroy)(const struct nft_ctx *ctx,
+ const struct nft_expr *expr);
int (*dump)(struct sk_buff *skb,
const struct nft_expr *expr);
int (*validate)(const struct nft_ctx *ctx,
* @handle: rule handle
* @genmask: generation mask
* @dlen: length of expression data
+ * @ulen: length of user data (used for comments)
* @data: expression data
*/
struct nft_rule {
struct list_head list;
- u64 handle:46,
+ u64 handle:42,
genmask:2,
- dlen:16;
+ dlen:12,
+ ulen:8;
unsigned char data[]
__attribute__((aligned(__alignof__(struct nft_expr))));
};
* struct nft_rule_trans - nf_tables rule update in transaction
*
* @list: used internally
+ * @ctx: rule context
* @rule: rule that needs to be updated
- * @chain: chain that this rule belongs to
- * @table: table for which this chain applies
- * @nlh: netlink header of the message that contain this update
- * @family: family expressesed as AF_*
*/
struct nft_rule_trans {
struct list_head list;
+ struct nft_ctx ctx;
struct nft_rule *rule;
- const struct nft_chain *chain;
- const struct nft_table *table;
- const struct nlmsghdr *nlh;
- u8 family;
};
static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
return (struct nft_expr *)&rule->data[rule->dlen];
}
+static inline void *nft_userdata(const struct nft_rule *rule)
+{
+ return (void *)&rule->data[rule->dlen];
+}
+
/*
* The last pointer isn't really necessary, but the compiler isn't able to
* determine that the result of nft_expr_last() is always the same since it
int nft_register_expr(struct nft_expr_type *);
void nft_unregister_expr(struct nft_expr_type *);
+#define nft_dereference(p) \
+ nfnl_dereference(p, NFNL_SUBSYS_NFTABLES)
+
#define MODULE_ALIAS_NFT_FAMILY(family) \
MODULE_ALIAS("nft-afinfo-" __stringify(family))
#include <linux/list_nulls.h>
#include <linux/atomic.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
+#include <linux/seqlock.h>
struct ctl_table_header;
struct nf_conntrack_ecache;
#endif
};
+struct ct_pcpu {
+ spinlock_t lock;
+ struct hlist_nulls_head unconfirmed;
+ struct hlist_nulls_head dying;
+ struct hlist_nulls_head tmpl;
+};
+
struct netns_ct {
atomic_t count;
unsigned int expect_count;
int sysctl_checksum;
unsigned int htable_size;
+ seqcount_t generation;
struct kmem_cache *nf_conntrack_cachep;
struct hlist_nulls_head *hash;
struct hlist_head *expect_hash;
- struct hlist_nulls_head unconfirmed;
- struct hlist_nulls_head dying;
- struct hlist_nulls_head tmpl;
+ struct ct_pcpu __percpu *pcpu_lists;
struct ip_conntrack_stat __percpu *stat;
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
u8 dying;
u8 proto;
u32 seq;
- struct xfrm_filter *filter;
+ struct xfrm_address_filter *filter;
};
/* Full description of state of transformer. */
struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
+struct xfrm_input_afinfo {
+ unsigned int family;
+ struct module *owner;
+ int (*callback)(struct sk_buff *skb, u8 protocol,
+ int err);
+};
+
+int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo);
+int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo);
+
void xfrm_state_delete_tunnel(struct xfrm_state *x);
struct xfrm_type {
int priority;
};
-/* XFRM tunnel handlers. */
-struct xfrm_tunnel {
+struct xfrm6_protocol {
int (*handler)(struct sk_buff *skb);
- int (*err_handler)(struct sk_buff *skb, u32 info);
+ int (*cb_handler)(struct sk_buff *skb, int err);
+ int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info);
- struct xfrm_tunnel __rcu *next;
+ struct xfrm6_protocol __rcu *next;
int priority;
};
-struct xfrm_tunnel_notifier {
+/* XFRM tunnel handlers. */
+struct xfrm_tunnel {
int (*handler)(struct sk_buff *skb);
- struct xfrm_tunnel_notifier __rcu *next;
+ int (*err_handler)(struct sk_buff *skb, u32 info);
+
+ struct xfrm_tunnel __rcu *next;
int priority;
};
int xfrm_state_init(struct net *net);
void xfrm_state_fini(struct net *net);
void xfrm4_state_init(void);
+void xfrm4_protocol_init(void);
#ifdef CONFIG_XFRM
int xfrm6_init(void);
void xfrm6_fini(void);
int xfrm6_state_init(void);
void xfrm6_state_fini(void);
+int xfrm6_protocol_init(void);
+void xfrm6_protocol_fini(void);
#else
static inline int xfrm6_init(void)
{
#endif
void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
- struct xfrm_filter *filter);
+ struct xfrm_address_filter *filter);
int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
int (*func)(struct xfrm_state *, int, void*), void *);
void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net);
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family);
int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family);
void xfrm4_local_error(struct sk_buff *skb, u32 mtu);
-int xfrm6_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler);
-int xfrm6_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler);
int xfrm6_extract_header(struct sk_buff *skb);
int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi);
int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
xfrm_address_t *saddr, u8 proto);
void xfrm6_local_error(struct sk_buff *skb, u32 mtu);
+int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err);
+int xfrm6_protocol_register(struct xfrm6_protocol *handler, unsigned char protocol);
+int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, unsigned char protocol);
int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family);
int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family);
__be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr);
return ret;
}
-static inline int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family,
- u8 protocol, int err)
-{
- switch(family) {
-#ifdef CONFIG_INET
- case AF_INET:
- return xfrm4_rcv_cb(skb, protocol, err);
-#endif
- }
- return 0;
-}
-
static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
unsigned int family)
{
IPSET_ATTR_PROTO, /* 7 */
IPSET_ATTR_CADT_FLAGS, /* 8 */
IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */
+ IPSET_ATTR_MARK, /* 10 */
+ IPSET_ATTR_MARKMASK, /* 11 */
/* Reserve empty slots */
IPSET_ATTR_CADT_MAX = 16,
/* Create-only specific attributes */
IPSET_ERR_IPADDR_IPV6,
IPSET_ERR_COUNTER,
IPSET_ERR_COMMENT,
+ IPSET_ERR_INVALID_MARKMASK,
/* Type specific error codes */
IPSET_ERR_TYPE_SPECIFIC = 4352,
IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS),
IPSET_FLAG_BIT_WITH_COMMENT = 4,
IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT),
+ IPSET_FLAG_BIT_WITH_FORCEADD = 5,
+ IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD),
IPSET_FLAG_CADT_MAX = 15,
};
+/* The flag bits which correspond to the non-extension create flags */
+enum ipset_create_flags {
+ IPSET_CREATE_FLAG_BIT_FORCEADD = 0,
+ IPSET_CREATE_FLAG_FORCEADD = (1 << IPSET_CREATE_FLAG_BIT_FORCEADD),
+ IPSET_CREATE_FLAG_BIT_MAX = 7,
+};
+
/* Commands with settype-specific attributes */
enum ipset_adt {
IPSET_ADD,
#ifndef _LINUX_NF_TABLES_H
#define _LINUX_NF_TABLES_H
-#define NFT_CHAIN_MAXNAMELEN 32
+#define NFT_CHAIN_MAXNAMELEN 32
+#define NFT_USERDATA_MAXLEN 256
enum nft_registers {
NFT_REG_VERDICT,
* @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes)
* @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes)
* @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64)
+ * @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN)
*/
enum nft_rule_attributes {
NFTA_RULE_UNSPEC,
NFTA_RULE_EXPRESSIONS,
NFTA_RULE_COMPAT,
NFTA_RULE_POSITION,
+ NFTA_RULE_USERDATA,
__NFTA_RULE_MAX
};
#define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1)
NFT_CT_PROTOCOL,
NFT_CT_PROTO_SRC,
NFT_CT_PROTO_DST,
+ NFT_CT_LABELS,
};
/**
XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_esn */
XFRMA_SA_EXTRA_FLAGS, /* __u32 */
XFRMA_PROTO, /* __u8 */
- XFRMA_FILTER, /* struct xfrm_filter */
+ XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */
__XFRMA_MAX
#define XFRMA_MAX (__XFRMA_MAX - 1)
__be16 new_sport;
};
-struct xfrm_filter {
+struct xfrm_address_filter {
xfrm_address_t saddr;
xfrm_address_t daddr;
__u16 family;
{
int ret;
- /* if netpoll wants it, pretend we never saw it */
- if (netpoll_rx(skb))
- return NET_RX_DROP;
-
net_timestamp_check(netdev_tstamp_prequeue, skb);
trace_netif_rx(skb);
trace_netif_receive_skb(skb);
- /* if we've gotten here through NAPI, check netpoll */
- if (netpoll_receive_skb(skb))
- goto out;
-
orig_dev = skb->dev;
skb_reset_network_header(skb);
unlock:
rcu_read_unlock();
-out:
return ret;
}
int same_flow;
enum gro_result ret;
- if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
+ if (!(skb->dev->features & NETIF_F_GRO))
goto normal;
if (skb_is_gso(skb) || skb_has_frag_list(skb))
static struct sk_buff_head skb_pool;
-static atomic_t trapped;
-
DEFINE_STATIC_SRCU(netpoll_srcu);
#define USEC_PER_POLL 50
-#define NETPOLL_RX_ENABLED 1
-#define NETPOLL_RX_DROP 2
#define MAX_SKB_SIZE \
(sizeof(struct ethhdr) + \
MAX_UDP_CHUNK)
static void zap_completion_queue(void);
-static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
static void netpoll_async_cleanup(struct work_struct *work);
static unsigned int carrier_timeout = 4;
}
}
-static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
- unsigned short ulen, __be32 saddr, __be32 daddr)
-{
- __wsum psum;
-
- if (uh->check == 0 || skb_csum_unnecessary(skb))
- return 0;
-
- psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
-
- if (skb->ip_summed == CHECKSUM_COMPLETE &&
- !csum_fold(csum_add(psum, skb->csum)))
- return 0;
-
- skb->csum = psum;
-
- return __skb_checksum_complete(skb);
-}
-
/*
* Check whether delayed processing was scheduled for our NIC. If so,
* we attempt to grab the poll lock and use ->poll() to pump the card.
* trylock here and interrupts are already disabled in the softirq
* case. Further, we test the poll_owner to avoid recursion on UP
* systems where the lock doesn't exist.
- *
- * In cases where there is bi-directional communications, reading only
- * one message at a time can lead to packets being dropped by the
- * network adapter, forcing superfluous retries and possibly timeouts.
- * Thus, we set our budget to greater than 1.
*/
-static int poll_one_napi(struct netpoll_info *npinfo,
- struct napi_struct *napi, int budget)
+static int poll_one_napi(struct napi_struct *napi, int budget)
{
int work;
if (!test_bit(NAPI_STATE_SCHED, &napi->state))
return budget;
- npinfo->rx_flags |= NETPOLL_RX_DROP;
- atomic_inc(&trapped);
set_bit(NAPI_STATE_NPSVC, &napi->state);
work = napi->poll(napi, budget);
+ WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll);
trace_napi_poll(napi);
clear_bit(NAPI_STATE_NPSVC, &napi->state);
- atomic_dec(&trapped);
- npinfo->rx_flags &= ~NETPOLL_RX_DROP;
return budget - work;
}
-static void poll_napi(struct net_device *dev)
+static void poll_napi(struct net_device *dev, int budget)
{
struct napi_struct *napi;
- int budget = 16;
list_for_each_entry(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner != smp_processor_id() &&
spin_trylock(&napi->poll_lock)) {
- budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
- napi, budget);
+ budget = poll_one_napi(napi, budget);
spin_unlock(&napi->poll_lock);
-
- if (!budget)
- break;
}
}
}
-static void service_neigh_queue(struct netpoll_info *npi)
-{
- if (npi) {
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&npi->neigh_tx)))
- netpoll_neigh_reply(skb, npi);
- }
-}
-
static void netpoll_poll_dev(struct net_device *dev)
{
const struct net_device_ops *ops;
struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
+ int budget = 0;
/* Don't do any rx activity if the dev_lock mutex is held
* the dev_open/close paths use this to block netpoll activity
/* Process pending work on NIC */
ops->ndo_poll_controller(dev);
- poll_napi(dev);
+ poll_napi(dev, budget);
up(&ni->dev_lock);
- if (dev->flags & IFF_SLAVE) {
- if (ni) {
- struct net_device *bond_dev;
- struct sk_buff *skb;
- struct netpoll_info *bond_ni;
-
- bond_dev = netdev_master_upper_dev_get_rcu(dev);
- bond_ni = rcu_dereference_bh(bond_dev->npinfo);
- while ((skb = skb_dequeue(&ni->neigh_tx))) {
- skb->dev = bond_dev;
- skb_queue_tail(&bond_ni->neigh_tx, skb);
- }
- }
- }
-
- service_neigh_queue(ni);
-
zap_completion_queue();
}
}
EXPORT_SYMBOL(netpoll_send_udp);
-static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
-{
- int size, type = ARPOP_REPLY;
- __be32 sip, tip;
- unsigned char *sha;
- struct sk_buff *send_skb;
- struct netpoll *np, *tmp;
- unsigned long flags;
- int hlen, tlen;
- int hits = 0, proto;
-
- if (list_empty(&npinfo->rx_np))
- return;
-
- /* Before checking the packet, we do some early
- inspection whether this is interesting at all */
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
- if (np->dev == skb->dev)
- hits++;
- }
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-
- /* No netpoll struct is using this dev */
- if (!hits)
- return;
-
- proto = ntohs(eth_hdr(skb)->h_proto);
- if (proto == ETH_P_ARP) {
- struct arphdr *arp;
- unsigned char *arp_ptr;
- /* No arp on this interface */
- if (skb->dev->flags & IFF_NOARP)
- return;
-
- if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
- return;
-
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
- arp = arp_hdr(skb);
-
- if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
- arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
- arp->ar_pro != htons(ETH_P_IP) ||
- arp->ar_op != htons(ARPOP_REQUEST))
- return;
-
- arp_ptr = (unsigned char *)(arp+1);
- /* save the location of the src hw addr */
- sha = arp_ptr;
- arp_ptr += skb->dev->addr_len;
- memcpy(&sip, arp_ptr, 4);
- arp_ptr += 4;
- /* If we actually cared about dst hw addr,
- it would get copied here */
- arp_ptr += skb->dev->addr_len;
- memcpy(&tip, arp_ptr, 4);
-
- /* Should we ignore arp? */
- if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
- return;
-
- size = arp_hdr_len(skb->dev);
-
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
- if (tip != np->local_ip.ip)
- continue;
-
- hlen = LL_RESERVED_SPACE(np->dev);
- tlen = np->dev->needed_tailroom;
- send_skb = find_skb(np, size + hlen + tlen, hlen);
- if (!send_skb)
- continue;
-
- skb_reset_network_header(send_skb);
- arp = (struct arphdr *) skb_put(send_skb, size);
- send_skb->dev = skb->dev;
- send_skb->protocol = htons(ETH_P_ARP);
-
- /* Fill the device header for the ARP frame */
- if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP,
- sha, np->dev->dev_addr,
- send_skb->len) < 0) {
- kfree_skb(send_skb);
- continue;
- }
-
- /*
- * Fill out the arp protocol part.
- *
- * we only support ethernet device type,
- * which (according to RFC 1390) should
- * always equal 1 (Ethernet).
- */
-
- arp->ar_hrd = htons(np->dev->type);
- arp->ar_pro = htons(ETH_P_IP);
- arp->ar_hln = np->dev->addr_len;
- arp->ar_pln = 4;
- arp->ar_op = htons(type);
-
- arp_ptr = (unsigned char *)(arp + 1);
- memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
- arp_ptr += np->dev->addr_len;
- memcpy(arp_ptr, &tip, 4);
- arp_ptr += 4;
- memcpy(arp_ptr, sha, np->dev->addr_len);
- arp_ptr += np->dev->addr_len;
- memcpy(arp_ptr, &sip, 4);
-
- netpoll_send_skb(np, send_skb);
-
- /* If there are several rx_skb_hooks for the same
- * address we're fine by sending a single reply
- */
- break;
- }
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
- } else if( proto == ETH_P_IPV6) {
-#if IS_ENABLED(CONFIG_IPV6)
- struct nd_msg *msg;
- u8 *lladdr = NULL;
- struct ipv6hdr *hdr;
- struct icmp6hdr *icmp6h;
- const struct in6_addr *saddr;
- const struct in6_addr *daddr;
- struct inet6_dev *in6_dev = NULL;
- struct in6_addr *target;
-
- in6_dev = in6_dev_get(skb->dev);
- if (!in6_dev || !in6_dev->cnf.accept_ra)
- return;
-
- if (!pskb_may_pull(skb, skb->len))
- return;
-
- msg = (struct nd_msg *)skb_transport_header(skb);
-
- __skb_push(skb, skb->data - skb_transport_header(skb));
-
- if (ipv6_hdr(skb)->hop_limit != 255)
- return;
- if (msg->icmph.icmp6_code != 0)
- return;
- if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
- return;
-
- saddr = &ipv6_hdr(skb)->saddr;
- daddr = &ipv6_hdr(skb)->daddr;
-
- size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
-
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
- if (!ipv6_addr_equal(daddr, &np->local_ip.in6))
- continue;
-
- hlen = LL_RESERVED_SPACE(np->dev);
- tlen = np->dev->needed_tailroom;
- send_skb = find_skb(np, size + hlen + tlen, hlen);
- if (!send_skb)
- continue;
-
- send_skb->protocol = htons(ETH_P_IPV6);
- send_skb->dev = skb->dev;
-
- skb_reset_network_header(send_skb);
- hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr));
- *(__be32*)hdr = htonl(0x60000000);
- hdr->payload_len = htons(size);
- hdr->nexthdr = IPPROTO_ICMPV6;
- hdr->hop_limit = 255;
- hdr->saddr = *saddr;
- hdr->daddr = *daddr;
-
- icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr));
- icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
- icmp6h->icmp6_router = 0;
- icmp6h->icmp6_solicited = 1;
-
- target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr));
- *target = msg->target;
- icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
- IPPROTO_ICMPV6,
- csum_partial(icmp6h,
- size, 0));
-
- if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6,
- lladdr, np->dev->dev_addr,
- send_skb->len) < 0) {
- kfree_skb(send_skb);
- continue;
- }
-
- netpoll_send_skb(np, send_skb);
-
- /* If there are several rx_skb_hooks for the same
- * address, we're fine by sending a single reply
- */
- break;
- }
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-#endif
- }
-}
-
-static bool pkt_is_ns(struct sk_buff *skb)
-{
- struct nd_msg *msg;
- struct ipv6hdr *hdr;
-
- if (skb->protocol != htons(ETH_P_ARP))
- return false;
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
- return false;
-
- msg = (struct nd_msg *)skb_transport_header(skb);
- __skb_push(skb, skb->data - skb_transport_header(skb));
- hdr = ipv6_hdr(skb);
-
- if (hdr->nexthdr != IPPROTO_ICMPV6)
- return false;
- if (hdr->hop_limit != 255)
- return false;
- if (msg->icmph.icmp6_code != 0)
- return false;
- if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
- return false;
-
- return true;
-}
-
-int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
-{
- int proto, len, ulen, data_len;
- int hits = 0, offset;
- const struct iphdr *iph;
- struct udphdr *uh;
- struct netpoll *np, *tmp;
- uint16_t source;
-
- if (list_empty(&npinfo->rx_np))
- goto out;
-
- if (skb->dev->type != ARPHRD_ETHER)
- goto out;
-
- /* check if netpoll clients need ARP */
- if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) {
- skb_queue_tail(&npinfo->neigh_tx, skb);
- return 1;
- } else if (pkt_is_ns(skb) && atomic_read(&trapped)) {
- skb_queue_tail(&npinfo->neigh_tx, skb);
- return 1;
- }
-
- if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
- skb = vlan_untag(skb);
- if (unlikely(!skb))
- goto out;
- }
-
- proto = ntohs(eth_hdr(skb)->h_proto);
- if (proto != ETH_P_IP && proto != ETH_P_IPV6)
- goto out;
- if (skb->pkt_type == PACKET_OTHERHOST)
- goto out;
- if (skb_shared(skb))
- goto out;
-
- if (proto == ETH_P_IP) {
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- goto out;
- iph = (struct iphdr *)skb->data;
- if (iph->ihl < 5 || iph->version != 4)
- goto out;
- if (!pskb_may_pull(skb, iph->ihl*4))
- goto out;
- iph = (struct iphdr *)skb->data;
- if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
- goto out;
-
- len = ntohs(iph->tot_len);
- if (skb->len < len || len < iph->ihl*4)
- goto out;
-
- /*
- * Our transport medium may have padded the buffer out.
- * Now We trim to the true length of the frame.
- */
- if (pskb_trim_rcsum(skb, len))
- goto out;
-
- iph = (struct iphdr *)skb->data;
- if (iph->protocol != IPPROTO_UDP)
- goto out;
-
- len -= iph->ihl*4;
- uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
- offset = (unsigned char *)(uh + 1) - skb->data;
- ulen = ntohs(uh->len);
- data_len = skb->len - offset;
- source = ntohs(uh->source);
-
- if (ulen != len)
- goto out;
- if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
- goto out;
- list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
- if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
- continue;
- if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
- continue;
- if (np->local_port && np->local_port != ntohs(uh->dest))
- continue;
-
- np->rx_skb_hook(np, source, skb, offset, data_len);
- hits++;
- }
- } else {
-#if IS_ENABLED(CONFIG_IPV6)
- const struct ipv6hdr *ip6h;
-
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
- goto out;
- ip6h = (struct ipv6hdr *)skb->data;
- if (ip6h->version != 6)
- goto out;
- len = ntohs(ip6h->payload_len);
- if (!len)
- goto out;
- if (len + sizeof(struct ipv6hdr) > skb->len)
- goto out;
- if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr)))
- goto out;
- ip6h = ipv6_hdr(skb);
- if (!pskb_may_pull(skb, sizeof(struct udphdr)))
- goto out;
- uh = udp_hdr(skb);
- offset = (unsigned char *)(uh + 1) - skb->data;
- ulen = ntohs(uh->len);
- data_len = skb->len - offset;
- source = ntohs(uh->source);
- if (ulen != skb->len)
- goto out;
- if (udp6_csum_init(skb, uh, IPPROTO_UDP))
- goto out;
- list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
- if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr))
- continue;
- if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr))
- continue;
- if (np->local_port && np->local_port != ntohs(uh->dest))
- continue;
-
- np->rx_skb_hook(np, source, skb, offset, data_len);
- hits++;
- }
-#endif
- }
-
- if (!hits)
- goto out;
-
- kfree_skb(skb);
- return 1;
-
-out:
- if (atomic_read(&trapped)) {
- kfree_skb(skb);
- return 1;
- }
-
- return 0;
-}
-
void netpoll_print_options(struct netpoll *np)
{
np_info(np, "local port %d\n", np->local_port);
{
struct netpoll_info *npinfo;
const struct net_device_ops *ops;
- unsigned long flags;
int err;
np->dev = ndev;
goto out;
}
- npinfo->rx_flags = 0;
- INIT_LIST_HEAD(&npinfo->rx_np);
-
- spin_lock_init(&npinfo->rx_lock);
sema_init(&npinfo->dev_lock, 1);
- skb_queue_head_init(&npinfo->neigh_tx);
skb_queue_head_init(&npinfo->txq);
INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
npinfo->netpoll = np;
- if (np->rx_skb_hook) {
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- npinfo->rx_flags |= NETPOLL_RX_ENABLED;
- list_add_tail(&np->rx, &npinfo->rx_np);
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
- }
-
/* last thing to do is link it to the net device structure */
rcu_assign_pointer(ndev->npinfo, npinfo);
struct netpoll_info *npinfo =
container_of(rcu_head, struct netpoll_info, rcu);
- skb_queue_purge(&npinfo->neigh_tx);
skb_queue_purge(&npinfo->txq);
/* we can't call cancel_delayed_work_sync here, as we are in softirq */
void __netpoll_cleanup(struct netpoll *np)
{
struct netpoll_info *npinfo;
- unsigned long flags;
/* rtnl_dereference would be preferable here but
* rcu_cleanup_netpoll path can put us in here safely without
if (!npinfo)
return;
- if (!list_empty(&npinfo->rx_np)) {
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- list_del(&np->rx);
- if (list_empty(&npinfo->rx_np))
- npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
- }
-
synchronize_srcu(&netpoll_srcu);
if (atomic_dec_and_test(&npinfo->refcnt)) {
rtnl_unlock();
}
EXPORT_SYMBOL(netpoll_cleanup);
-
-int netpoll_trap(void)
-{
- return atomic_read(&trapped);
-}
-EXPORT_SYMBOL(netpoll_trap);
-
-void netpoll_set_trap(int trap)
-{
- if (trap)
- atomic_inc(&trapped);
- else
- atomic_dec(&trapped);
-}
-EXPORT_SYMBOL(netpoll_set_trap);
skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
if (IS_ERR(dst))
- return PTR_ERR(dst);;
+ return PTR_ERR(dst);
skb_dst_set(skb, dst);
}
#endif
xfrm4_state_init();
xfrm4_policy_init();
+ xfrm4_protocol_init();
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm4_net_ops);
#endif
.netns_ok = 1,
};
+static struct xfrm_input_afinfo xfrm4_input_afinfo = {
+ .family = AF_INET,
+ .owner = THIS_MODULE,
+ .callback = xfrm4_rcv_cb,
+};
+
static inline const struct net_protocol *netproto(unsigned char protocol)
{
switch (protocol) {
struct xfrm4_protocol __rcu **pprev;
struct xfrm4_protocol *t;
bool add_netproto = false;
-
int ret = -EEXIST;
int priority = handler->priority;
return ret;
}
EXPORT_SYMBOL(xfrm4_protocol_deregister);
+
+void __init xfrm4_protocol_init(void)
+{
+ xfrm_input_register_afinfo(&xfrm4_input_afinfo);
+}
+EXPORT_SYMBOL(xfrm4_protocol_init);
ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
- xfrm6_output.o
+ xfrm6_output.o xfrm6_protocol.o
ipv6-$(CONFIG_NETFILTER) += netfilter.o
ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_PROC_FS) += proc.o
return err;
}
-static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static int ah6_init_state(struct xfrm_state *x)
kfree(ahp);
}
+static int ah6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ah6_type =
{
.description = "AH6",
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol ah6_protocol = {
+static struct xfrm6_protocol ah6_protocol = {
.handler = xfrm6_rcv,
+ .cb_handler = ah6_rcv_cb,
.err_handler = ah6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init ah6_init(void)
return -EAGAIN;
}
- if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) {
+ if (xfrm6_protocol_register(&ah6_protocol, IPPROTO_AH) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ah6_type, AF_INET6);
return -EAGAIN;
static void __exit ah6_fini(void)
{
- if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0)
+ if (xfrm6_protocol_deregister(&ah6_protocol, IPPROTO_AH) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
net_adj) & ~(blksize - 1)) + net_adj - 2;
}
-static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
esph->spi, IPPROTO_ESP, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static void esp6_destroy(struct xfrm_state *x)
return err;
}
+static int esp6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type esp6_type =
{
.description = "ESP6",
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol esp6_protocol = {
- .handler = xfrm6_rcv,
+static struct xfrm6_protocol esp6_protocol = {
+ .handler = xfrm6_rcv,
+ .cb_handler = esp6_rcv_cb,
.err_handler = esp6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init esp6_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) {
+ if (xfrm6_protocol_register(&esp6_protocol, IPPROTO_ESP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&esp6_type, AF_INET6);
return -EAGAIN;
static void __exit esp6_fini(void)
{
- if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0)
+ if (xfrm6_protocol_deregister(&esp6_protocol, IPPROTO_ESP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
else
vti6_tnl_unlink(ip6n, t);
- ip6_tnl_dst_reset(t);
dev_put(dev);
}
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
rcu_read_lock();
-
if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
&ipv6h->daddr)) != NULL) {
- struct pcpu_sw_netstats *tstats;
-
if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) {
rcu_read_unlock();
goto discard;
goto discard;
}
- tstats = this_cpu_ptr(t->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
-
- skb->mark = 0;
- secpath_reset(skb);
- skb->dev = t->dev;
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
+ skb->mark = be32_to_cpu(t->parms.i_key);
rcu_read_unlock();
- return 0;
+
+ return xfrm6_rcv(skb);
}
rcu_read_unlock();
- return 1;
-
+ return -EINVAL;
discard:
kfree_skb(skb);
return 0;
}
+static int vti6_rcv_cb(struct sk_buff *skb, int err)
+{
+ unsigned short family;
+ struct net_device *dev;
+ struct pcpu_sw_netstats *tstats;
+ struct xfrm_state *x;
+ struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
+
+ if (!t)
+ return 1;
+
+ dev = t->dev;
+
+ if (err) {
+ dev->stats.rx_errors++;
+ dev->stats.rx_dropped++;
+
+ return 0;
+ }
+
+ x = xfrm_input_state(skb);
+ family = x->inner_mode->afinfo->family;
+
+ if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ return -EPERM;
+
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev)));
+ skb->dev = dev;
+
+ tstats = this_cpu_ptr(dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
+
+ return 0;
+}
+
/**
* vti6_addr_conflict - compare packet addresses to tunnel's own
* @t: the outgoing tunnel device
return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}
+static bool vti6_state_check(const struct xfrm_state *x,
+ const struct in6_addr *dst,
+ const struct in6_addr *src)
+{
+ xfrm_address_t *daddr = (xfrm_address_t *)dst;
+ xfrm_address_t *saddr = (xfrm_address_t *)src;
+
+ /* if there is no transform then this tunnel is not functional.
+ * Or if the xfrm is not mode tunnel.
+ */
+ if (!x || x->props.mode != XFRM_MODE_TUNNEL ||
+ x->props.family != AF_INET6)
+ return false;
+
+ if (ipv6_addr_any(dst))
+ return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET6);
+
+ if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET6))
+ return false;
+
+ return true;
+}
+
/**
* vti6_xmit - send a packet
* @skb: the outgoing socket buffer
* @dev: the outgoing tunnel device
+ * @fl: the flow informations for the xfrm_lookup
**/
-static int vti6_xmit(struct sk_buff *skb, struct net_device *dev)
+static int
+vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
{
- struct net *net = dev_net(dev);
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
- struct dst_entry *dst = NULL, *ndst = NULL;
- struct flowi6 fl6;
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev;
int err = -1;
- if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
- !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h))
- return err;
-
- dst = ip6_tnl_dst_check(t);
- if (!dst) {
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
- ndst = ip6_route_output(net, NULL, &fl6);
+ if (!dst)
+ goto tx_err_link_failure;
- if (ndst->error)
- goto tx_err_link_failure;
- ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(&fl6), NULL, 0);
- if (IS_ERR(ndst)) {
- err = PTR_ERR(ndst);
- ndst = NULL;
- goto tx_err_link_failure;
- }
- dst = ndst;
+ dst_hold(dst);
+ dst = xfrm_lookup(t->net, dst, fl, NULL, 0);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
+ goto tx_err_link_failure;
}
- if (!dst->xfrm || dst->xfrm->props.mode != XFRM_MODE_TUNNEL)
+ if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr))
goto tx_err_link_failure;
tdev = dst->dev;
goto tx_err_dst_release;
}
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
+ skb_dst_set(skb, dst);
+ skb->dev = skb_dst(skb)->dev;
- skb_dst_drop(skb);
- skb_dst_set_noref(skb, dst);
+ err = dst_output(skb);
+ if (net_xmit_eval(err) == 0) {
+ struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
- ip6tunnel_xmit(skb, dev);
- if (ndst) {
- dev->mtu = dst_mtu(ndst);
- ip6_tnl_dst_store(t, ndst);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->tx_bytes += skb->len;
+ tstats->tx_packets++;
+ u64_stats_update_end(&tstats->syncp);
+ } else {
+ stats->tx_errors++;
+ stats->tx_aborted_errors++;
}
return 0;
stats->tx_carrier_errors++;
dst_link_failure(skb);
tx_err_dst_release:
- dst_release(ndst);
+ dst_release(dst);
return err;
}
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
+ struct ipv6hdr *ipv6h;
+ struct flowi fl;
int ret;
+ memset(&fl, 0, sizeof(fl));
+ skb->mark = be32_to_cpu(t->parms.o_key);
+
switch (skb->protocol) {
case htons(ETH_P_IPV6):
- ret = vti6_xmit(skb, dev);
+ ipv6h = ipv6_hdr(skb);
+
+ if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
+ !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h))
+ goto tx_err;
+
+ xfrm_decode_session(skb, &fl, AF_INET6);
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ break;
+ case htons(ETH_P_IP):
+ xfrm_decode_session(skb, &fl, AF_INET);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
break;
default:
goto tx_err;
}
+ ret = vti6_xmit(skb, dev, &fl);
if (ret < 0)
goto tx_err;
return NETDEV_TX_OK;
}
+static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ __be32 spi;
+ struct xfrm_state *x;
+ struct ip6_tnl *t;
+ struct ip_esp_hdr *esph;
+ struct ip_auth_hdr *ah;
+ struct ip_comp_hdr *ipch;
+ struct net *net = dev_net(skb->dev);
+ const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+ int protocol = iph->nexthdr;
+
+ t = vti6_tnl_lookup(dev_net(skb->dev), &iph->daddr, &iph->saddr);
+ if (!t)
+ return -1;
+
+ switch (protocol) {
+ case IPPROTO_ESP:
+ esph = (struct ip_esp_hdr *)(skb->data + offset);
+ spi = esph->spi;
+ break;
+ case IPPROTO_AH:
+ ah = (struct ip_auth_hdr *)(skb->data + offset);
+ spi = ah->spi;
+ break;
+ case IPPROTO_COMP:
+ ipch = (struct ip_comp_hdr *)(skb->data + offset);
+ spi = htonl(ntohs(ipch->cpi));
+ break;
+ default:
+ return 0;
+ }
+
+ if (type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
+ return 0;
+
+ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+ spi, protocol, AF_INET6);
+ if (!x)
+ return 0;
+
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
+ xfrm_state_put(x);
+
+ return 0;
+}
+
static void vti6_link_config(struct ip6_tnl *t)
{
- struct dst_entry *dst;
struct net_device *dev = t->dev;
struct __ip6_tnl_parm *p = &t->parms;
- struct flowi6 *fl6 = &t->fl.u.ip6;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
- /* Set up flowi template */
- fl6->saddr = p->laddr;
- fl6->daddr = p->raddr;
- fl6->flowi6_oif = p->link;
- fl6->flowi6_mark = be32_to_cpu(p->i_key);
- fl6->flowi6_proto = p->proto;
- fl6->flowlabel = 0;
-
p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV |
IP6_TNL_F_CAP_PER_PACKET);
p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
dev->flags &= ~IFF_POINTOPOINT;
dev->iflink = p->link;
-
- if (p->flags & IP6_TNL_F_CAP_XMIT) {
-
- dst = ip6_route_output(dev_net(dev), NULL, fl6);
- if (dst->error)
- return;
-
- dst = xfrm_lookup(dev_net(dev), dst, flowi6_to_flowi(fl6),
- NULL, 0);
- if (IS_ERR(dst))
- return;
-
- if (dst->dev) {
- dev->hard_header_len = dst->dev->hard_header_len;
-
- dev->mtu = dst_mtu(dst);
-
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
- }
- dst_release(dst);
- }
}
/**
t = netdev_priv(dev);
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
}
.fill_info = vti6_fill_info,
};
-static struct xfrm_tunnel_notifier vti6_handler __read_mostly = {
- .handler = vti6_rcv,
- .priority = 1,
-};
-
static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
{
int h;
.size = sizeof(struct vti6_net),
};
+static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+static struct xfrm6_protocol vti_ah6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
/**
* vti6_tunnel_init - register protocol and reserve needed resources
*
if (err < 0)
goto out_pernet;
- err = xfrm6_mode_tunnel_input_register(&vti6_handler);
+ err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP);
if (err < 0) {
- pr_err("%s: can't register vti6\n", __func__);
+ unregister_pernet_device(&vti6_net_ops);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
goto out;
}
+
+ err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH);
+ if (err < 0) {
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+ unregister_pernet_device(&vti6_net_ops);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
+ goto out;
+ }
+
+ err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ if (err < 0) {
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+ unregister_pernet_device(&vti6_net_ops);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
+ goto out;
+ }
+
err = rtnl_link_register(&vti6_link_ops);
if (err < 0)
goto rtnl_link_failed;
return 0;
rtnl_link_failed:
- xfrm6_mode_tunnel_input_deregister(&vti6_handler);
+ xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
out:
unregister_pernet_device(&vti6_net_ops);
out_pernet:
static void __exit vti6_tunnel_cleanup(void)
{
rtnl_link_unregister(&vti6_link_ops);
- if (xfrm6_mode_tunnel_input_deregister(&vti6_handler))
- pr_info("%s: can't deregister vti6\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP))
+ pr_info("%s: can't deregister protocol\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH))
+ pr_info("%s: can't deregister protocol\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP))
+ pr_info("%s: can't deregister protocol\n", __func__);
unregister_pernet_device(&vti6_net_ops);
}
#include <linux/icmpv6.h>
#include <linux/mutex.h>
-static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
spi = htonl(ntohs(ipcomph->cpi));
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
spi, IPPROTO_COMP, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
return err;
}
+static int ipcomp6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ipcomp6_type =
{
.description = "IPCOMP6",
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol ipcomp6_protocol =
+static struct xfrm6_protocol ipcomp6_protocol =
{
.handler = xfrm6_rcv,
+ .cb_handler = ipcomp6_rcv_cb,
.err_handler = ipcomp6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init ipcomp6_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
+ if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ipcomp6_type, AF_INET6);
return -EAGAIN;
static void __exit ipcomp6_fini(void)
{
- if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0)
+ if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
#include <net/ipv6.h>
#include <net/xfrm.h>
-/* Informational hook. The decap is still done here. */
-static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly;
-static DEFINE_MUTEX(xfrm6_mode_tunnel_input_mutex);
-
-int xfrm6_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler)
-{
- struct xfrm_tunnel_notifier __rcu **pprev;
- struct xfrm_tunnel_notifier *t;
- int ret = -EEXIST;
- int priority = handler->priority;
-
- mutex_lock(&xfrm6_mode_tunnel_input_mutex);
-
- for (pprev = &rcv_notify_handlers;
- (t = rcu_dereference_protected(*pprev,
- lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL;
- pprev = &t->next) {
- if (t->priority > priority)
- break;
- if (t->priority == priority)
- goto err;
-
- }
-
- handler->next = *pprev;
- rcu_assign_pointer(*pprev, handler);
-
- ret = 0;
-
-err:
- mutex_unlock(&xfrm6_mode_tunnel_input_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_register);
-
-int xfrm6_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler)
-{
- struct xfrm_tunnel_notifier __rcu **pprev;
- struct xfrm_tunnel_notifier *t;
- int ret = -ENOENT;
-
- mutex_lock(&xfrm6_mode_tunnel_input_mutex);
- for (pprev = &rcv_notify_handlers;
- (t = rcu_dereference_protected(*pprev,
- lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL;
- pprev = &t->next) {
- if (t == handler) {
- *pprev = handler->next;
- ret = 0;
- break;
- }
- }
- mutex_unlock(&xfrm6_mode_tunnel_input_mutex);
- synchronize_net();
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_deregister);
-
static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
{
const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct xfrm_tunnel_notifier *handler;
int err = -EINVAL;
if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
- for_each_input_rcu(rcv_notify_handlers, handler)
- handler->handler(skb);
-
err = skb_unclone(skb, GFP_ATOMIC);
if (err)
goto out;
if (ret)
goto out_policy;
+ ret = xfrm6_protocol_init();
+ if (ret)
+ goto out_state;
+
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm6_net_ops);
#endif
out:
return ret;
+out_state:
+ xfrm6_state_fini();
out_policy:
xfrm6_policy_fini();
goto out;
#ifdef CONFIG_SYSCTL
unregister_pernet_subsys(&xfrm6_net_ops);
#endif
+ xfrm6_protocol_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
dst_entries_destroy(&xfrm6_dst_ops);
--- /dev/null
+/* xfrm6_protocol.c - Generic xfrm protocol multiplexer for ipv6.
+ *
+ * Copyright (C) 2013 secunet Security Networks AG
+ *
+ * Author:
+ * Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * Based on:
+ * net/ipv4/xfrm4_protocol.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include <linux/icmpv6.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+static struct xfrm6_protocol __rcu *esp6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ah6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ipcomp6_handlers __read_mostly;
+static DEFINE_MUTEX(xfrm6_protocol_mutex);
+
+static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp6_handlers;
+ case IPPROTO_AH:
+ return &ah6_handlers;
+ case IPPROTO_COMP:
+ return &ipcomp6_handlers;
+ }
+
+ return NULL;
+}
+
+#define for_each_protocol_rcu(head, handler) \
+ for (handler = rcu_dereference(head); \
+ handler != NULL; \
+ handler = rcu_dereference(handler->next)) \
+
+int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(*proto_handlers(protocol), handler)
+ if ((ret = handler->cb_handler(skb, err)) <= 0)
+ return ret;
+
+ return 0;
+}
+EXPORT_SYMBOL(xfrm6_rcv_cb);
+
+static int xfrm6_esp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(esp6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(esp6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static int xfrm6_ah_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(ah6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(ah6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static int xfrm6_ipcomp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(ipcomp6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(ipcomp6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static const struct inet6_protocol esp6_protocol = {
+ .handler = xfrm6_esp_rcv,
+ .err_handler = xfrm6_esp_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ah6_protocol = {
+ .handler = xfrm6_ah_rcv,
+ .err_handler = xfrm6_ah_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ipcomp6_protocol = {
+ .handler = xfrm6_ipcomp_rcv,
+ .err_handler = xfrm6_ipcomp_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static struct xfrm_input_afinfo xfrm6_input_afinfo = {
+ .family = AF_INET6,
+ .owner = THIS_MODULE,
+ .callback = xfrm6_rcv_cb,
+};
+
+static inline const struct inet6_protocol *netproto(unsigned char protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp6_protocol;
+ case IPPROTO_AH:
+ return &ah6_protocol;
+ case IPPROTO_COMP:
+ return &ipcomp6_protocol;
+ }
+
+ return NULL;
+}
+
+int xfrm6_protocol_register(struct xfrm6_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm6_protocol __rcu **pprev;
+ struct xfrm6_protocol *t;
+ bool add_netproto = false;
+
+ int ret = -EEXIST;
+ int priority = handler->priority;
+
+ mutex_lock(&xfrm6_protocol_mutex);
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm6_protocol_mutex)))
+ add_netproto = true;
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority < priority)
+ break;
+ if (t->priority == priority)
+ goto err;
+ }
+
+ handler->next = *pprev;
+ rcu_assign_pointer(*pprev, handler);
+
+ ret = 0;
+
+err:
+ mutex_unlock(&xfrm6_protocol_mutex);
+
+ if (add_netproto) {
+ if (inet6_add_protocol(netproto(protocol), protocol)) {
+ pr_err("%s: can't add protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_register);
+
+int xfrm6_protocol_deregister(struct xfrm6_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm6_protocol __rcu **pprev;
+ struct xfrm6_protocol *t;
+ int ret = -ENOENT;
+
+ mutex_lock(&xfrm6_protocol_mutex);
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
+ *pprev = handler->next;
+ ret = 0;
+ break;
+ }
+ }
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm6_protocol_mutex))) {
+ if (inet6_del_protocol(netproto(protocol), protocol) < 0) {
+ pr_err("%s: can't remove protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ mutex_unlock(&xfrm6_protocol_mutex);
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_deregister);
+
+int __init xfrm6_protocol_init(void)
+{
+ return xfrm_input_register_afinfo(&xfrm6_input_afinfo);
+}
+
+void xfrm6_protocol_fini(void)
+{
+ xfrm_input_unregister_afinfo(&xfrm6_input_afinfo);
+}
static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
{
u8 proto;
- struct xfrm_filter *filter = NULL;
+ struct xfrm_address_filter *filter = NULL;
struct pfkey_sock *pfk = pfkey_sk(sk);
if (pfk->dump.dump != NULL)
To compile it as a module, choose M here. If unsure, say N.
+config IP_SET_HASH_IPMARK
+ tristate "hash:ip,mark set support"
+ depends on IP_SET
+ help
+ This option adds the hash:ip,mark set type support, by which one
+ can store IPv4/IPv6 address and mark pairs.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP_SET_HASH_IPPORT
tristate "hash:ip,port set support"
depends on IP_SET
# hash types
obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o
+obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o
obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
/* When the nfnl mutex is held: */
-#define nfnl_dereference(p) \
+#define ip_set_dereference(p) \
rcu_dereference_protected(p, 1)
-#define nfnl_set(inst, id) \
- nfnl_dereference((inst)->ip_set_list)[id]
+#define ip_set(inst, id) \
+ ip_set_dereference((inst)->ip_set_list)[id]
/*
* The set types are implemented in modules and registered set types
if (tb[IPSET_ATTR_CADT_FLAGS])
cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+ if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
+ set->flags |= IPSET_CREATE_FLAG_FORCEADD;
for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
if (!add_extension(id, cadt_flags, tb))
continue;
if (opt->dim < set->type->dimension ||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
- return 0;
+ return -IPSET_ERR_TYPE_MISMATCH;
write_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
if (opt->dim < set->type->dimension ||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
- return 0;
+ return -IPSET_ERR_TYPE_MISMATCH;
write_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
return IPSET_INVALID_ID;
nfnl_lock(NFNL_SUBSYS_IPSET);
- set = nfnl_set(inst, index);
+ set = ip_set(inst, index);
if (set)
__ip_set_get(set);
else
nfnl_lock(NFNL_SUBSYS_IPSET);
if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
- set = nfnl_set(inst, index);
+ set = ip_set(inst, index);
if (set != NULL)
__ip_set_put(set);
}
*id = IPSET_INVALID_ID;
for (i = 0; i < inst->ip_set_max; i++) {
- set = nfnl_set(inst, i);
+ set = ip_set(inst, i);
if (set != NULL && STREQ(set->name, name)) {
*id = i;
break;
*index = IPSET_INVALID_ID;
for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
+ s = ip_set(inst, i);
if (s == NULL) {
if (*index == IPSET_INVALID_ID)
*index = i;
if (!list)
goto cleanup;
/* nfnl mutex is held, both lists are valid */
- tmp = nfnl_dereference(inst->ip_set_list);
+ tmp = ip_set_dereference(inst->ip_set_list);
memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
rcu_assign_pointer(inst->ip_set_list, list);
/* Make sure all current packets have passed through */
* Finally! Add our shiny new set to the list, and be done.
*/
pr_debug("create: '%s' created with index %u!\n", set->name, index);
- nfnl_set(inst, index) = set;
+ ip_set(inst, index) = set;
return ret;
static void
ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index)
{
- struct ip_set *set = nfnl_set(inst, index);
+ struct ip_set *set = ip_set(inst, index);
pr_debug("set: %s\n", set->name);
- nfnl_set(inst, index) = NULL;
+ ip_set(inst, index) = NULL;
/* Must call it without holding any lock */
set->variant->destroy(set);
read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) {
for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
+ s = ip_set(inst, i);
if (s != NULL && s->ref) {
ret = -IPSET_ERR_BUSY;
goto out;
}
read_unlock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
+ s = ip_set(inst, i);
if (s != NULL)
ip_set_destroy_set(inst, i);
}
if (!attr[IPSET_ATTR_SETNAME]) {
for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
+ s = ip_set(inst, i);
if (s != NULL)
ip_set_flush_set(s);
}
name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
+ s = ip_set(inst, i);
if (s != NULL && STREQ(s->name, name2)) {
ret = -IPSET_ERR_EXIST_SETNAME2;
goto out;
write_lock_bh(&ip_set_ref_lock);
swap(from->ref, to->ref);
- nfnl_set(inst, from_id) = to;
- nfnl_set(inst, to_id) = from;
+ ip_set(inst, from_id) = to;
+ ip_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock);
return 0;
struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET];
if (cb->args[IPSET_CB_ARG0]) {
pr_debug("release set %s\n",
- nfnl_set(inst, cb->args[IPSET_CB_INDEX])->name);
+ ip_set(inst, cb->args[IPSET_CB_INDEX])->name);
__ip_set_put_byindex(inst,
(ip_set_id_t) cb->args[IPSET_CB_INDEX]);
}
dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
index = (ip_set_id_t) cb->args[IPSET_CB_INDEX];
- set = nfnl_set(inst, index);
+ set = ip_set(inst, index);
if (set == NULL) {
if (dump_type == DUMP_ONE) {
ret = -ENOENT;
release_refcount:
/* If there was an error or set is done, release set */
if (ret || !cb->args[IPSET_CB_ARG0]) {
- pr_debug("release set %s\n", nfnl_set(inst, index)->name);
+ pr_debug("release set %s\n", ip_set(inst, index)->name);
__ip_set_put_byindex(inst, index);
cb->args[IPSET_CB_ARG0] = 0;
}
find_set_and_id(inst, req_get->set.name, &id);
req_get->set.index = id;
if (id != IPSET_INVALID_ID)
- req_get->family = nfnl_set(inst, id)->family;
+ req_get->family = ip_set(inst, id)->family;
nfnl_unlock(NFNL_SUBSYS_IPSET);
goto copy;
}
goto done;
}
nfnl_lock(NFNL_SUBSYS_IPSET);
- set = nfnl_set(inst, req_get->set.index);
+ set = ip_set(inst, req_get->set.index);
strncpy(req_get->set.name, set ? set->name : "",
IPSET_MAXNAMELEN);
nfnl_unlock(NFNL_SUBSYS_IPSET);
return -ENOMEM;
inst->is_deleted = 0;
rcu_assign_pointer(inst->ip_set_list, list);
- pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
return 0;
}
inst->is_deleted = 1; /* flag for ip_set_nfnl_put */
for (i = 0; i < inst->ip_set_max; i++) {
- set = nfnl_set(inst, i);
+ set = ip_set(inst, i);
if (set != NULL)
ip_set_destroy_set(inst, i);
}
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
return ret;
}
+ pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL);
return 0;
}
u32 maxelem; /* max elements in the hash */
u32 elements; /* current element (vs timeout) */
u32 initval; /* random jhash init value */
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ u32 markmask; /* markmask value for mark mask to store */
+#endif
struct timer_list gc; /* garbage collection when timeout enabled */
struct mtype_elem next; /* temporary storage for uadd */
#ifdef IP_SET_HASH_WITH_MULTI
a->timeout == b->timeout &&
#ifdef IP_SET_HASH_WITH_NETMASK
x->netmask == y->netmask &&
+#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ x->markmask == y->markmask &&
#endif
a->extensions == b->extensions;
}
bool flag_exist = flags & IPSET_FLAG_EXIST;
u32 key, multi = 0;
+ if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set)) {
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh(h->table);
+ key = HKEY(value, h->initval, t->htable_bits);
+ n = hbucket(t,key);
+ if (n->pos) {
+ /* Choosing the first entry in the array to replace */
+ j = 0;
+ goto reuse_slot;
+ }
+ rcu_read_unlock_bh();
+ }
if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
/* FIXME: when set is full, we slow down here */
mtype_expire(set, h, NLEN(set->family), set->dsize);
if (h->netmask != HOST_MASK &&
nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
goto nla_put_failure;
+#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
+ goto nla_put_failure;
#endif
if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
struct nlattr *tb[], u32 flags)
{
u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ u32 markmask;
+#endif
u8 hbits;
#ifdef IP_SET_HASH_WITH_NETMASK
u8 netmask;
if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
return -IPSET_ERR_INVALID_FAMILY;
+
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ markmask = 0xffffffff;
+#endif
#ifdef IP_SET_HASH_WITH_NETMASK
netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
pr_debug("Create set %s with family %s\n",
if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK) ||
+#endif
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
return -IPSET_ERR_INVALID_NETMASK;
}
#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ if (tb[IPSET_ATTR_MARKMASK]) {
+ markmask = ntohl(nla_get_u32(tb[IPSET_ATTR_MARKMASK]));
+
+ if ((markmask > 4294967295u) || markmask == 0)
+ return -IPSET_ERR_INVALID_MARKMASK;
+ }
+#endif
hsize = sizeof(*h);
#ifdef IP_SET_HASH_WITH_NETS
h->maxelem = maxelem;
#ifdef IP_SET_HASH_WITH_NETMASK
h->netmask = netmask;
+#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ h->markmask = markmask;
#endif
get_random_bytes(&h->initval, sizeof(h->initval));
set->timeout = IPSET_NO_TIMEOUT;
#define IPSET_TYPE_REV_MIN 0
/* 1 Counters support */
-#define IPSET_TYPE_REV_MAX 2 /* Comments support */
+/* 2 Comments support */
+#define IPSET_TYPE_REV_MAX 3 /* Forceadd support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
--- /dev/null
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2013 Smoothwall Ltd. <vytas.dauksa@smoothwall.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:ip,mark type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN 0
+#define IPSET_TYPE_REV_MAX 1 /* Forceadd support */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>");
+IP_SET_MODULE_DESC("hash:ip,mark", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:ip,mark");
+
+/* Type specific function prefix */
+#define HTYPE hash_ipmark
+#define IP_SET_HASH_WITH_MARKMASK
+
+/* IPv4 variant */
+
+/* Member elements */
+struct hash_ipmark4_elem {
+ __be32 ip;
+ __u32 mark;
+};
+
+/* Common functions */
+
+static inline bool
+hash_ipmark4_data_equal(const struct hash_ipmark4_elem *ip1,
+ const struct hash_ipmark4_elem *ip2,
+ u32 *multi)
+{
+ return ip1->ip == ip2->ip &&
+ ip1->mark == ip2->mark;
+}
+
+static bool
+hash_ipmark4_data_list(struct sk_buff *skb,
+ const struct hash_ipmark4_elem *data)
+{
+ if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
+ nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return 1;
+}
+
+static inline void
+hash_ipmark4_data_next(struct hash_ipmark4_elem *next,
+ const struct hash_ipmark4_elem *d)
+{
+ next->ip = d->ip;
+}
+
+#define MTYPE hash_ipmark4
+#define PF 4
+#define HOST_MASK 32
+#define HKEY_DATALEN sizeof(struct hash_ipmark4_elem)
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ const struct hash_ipmark *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmark4_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ e.mark = skb->mark;
+ e.mark &= h->markmask;
+
+ ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ const struct hash_ipmark *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmark4_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 ip, ip_to = 0;
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_IP] ||
+ !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
+ e.mark &= h->markmask;
+
+ if (adt == IPSET_TEST ||
+ !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) {
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ return ip_set_eexist(ret, flags) ? 0 : ret;
+ }
+
+ ip_to = ip = ntohl(e.ip);
+ if (tb[IPSET_ATTR_IP_TO]) {
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+ if (ret)
+ return ret;
+ if (ip > ip_to)
+ swap(ip, ip_to);
+ } else if (tb[IPSET_ATTR_CIDR]) {
+ u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+ if (!cidr || cidr > 32)
+ return -IPSET_ERR_INVALID_CIDR;
+ ip_set_mask_from_to(ip, ip_to, cidr);
+ }
+
+ if (retried)
+ ip = ntohl(h->next.ip);
+ for (; !before(ip_to, ip); ip++) {
+ e.ip = htonl(ip);
+ ret = adtfn(set, &e, &ext, &ext, flags);
+
+ if (ret && !ip_set_eexist(ret, flags))
+ return ret;
+ else
+ ret = 0;
+ }
+ return ret;
+}
+
+/* IPv6 variant */
+
+struct hash_ipmark6_elem {
+ union nf_inet_addr ip;
+ __u32 mark;
+};
+
+/* Common functions */
+
+static inline bool
+hash_ipmark6_data_equal(const struct hash_ipmark6_elem *ip1,
+ const struct hash_ipmark6_elem *ip2,
+ u32 *multi)
+{
+ return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
+ ip1->mark == ip2->mark;
+}
+
+static bool
+hash_ipmark6_data_list(struct sk_buff *skb,
+ const struct hash_ipmark6_elem *data)
+{
+ if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) ||
+ nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return 1;
+}
+
+static inline void
+hash_ipmark6_data_next(struct hash_ipmark4_elem *next,
+ const struct hash_ipmark6_elem *d)
+{
+}
+
+#undef MTYPE
+#undef PF
+#undef HOST_MASK
+#undef HKEY_DATALEN
+
+#define MTYPE hash_ipmark6
+#define PF 6
+#define HOST_MASK 128
+#define HKEY_DATALEN sizeof(struct hash_ipmark6_elem)
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+
+static int
+hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ const struct hash_ipmark *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmark6_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ e.mark = skb->mark;
+ e.mark &= h->markmask;
+
+ ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ const struct hash_ipmark *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmark6_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_IP] ||
+ !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ tb[IPSET_ATTR_IP_TO] ||
+ tb[IPSET_ATTR_CIDR]))
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
+ e.mark &= h->markmask;
+
+ if (adt == IPSET_TEST) {
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ return ip_set_eexist(ret, flags) ? 0 : ret;
+ }
+
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ if (ret && !ip_set_eexist(ret, flags))
+ return ret;
+ else
+ ret = 0;
+
+ return ret;
+}
+
+static struct ip_set_type hash_ipmark_type __read_mostly = {
+ .name = "hash:ip,mark",
+ .protocol = IPSET_PROTOCOL,
+ .features = IPSET_TYPE_IP | IPSET_TYPE_MARK,
+ .dimension = IPSET_DIM_TWO,
+ .family = NFPROTO_UNSPEC,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
+ .create = hash_ipmark_create,
+ .create_policy = {
+ [IPSET_ATTR_MARKMASK] = { .type = NLA_U32 },
+ [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
+ [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
+ [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ },
+ .adt_policy = {
+ [IPSET_ATTR_IP] = { .type = NLA_NESTED },
+ [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
+ [IPSET_ATTR_MARK] = { .type = NLA_U32 },
+ [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init
+hash_ipmark_init(void)
+{
+ return ip_set_type_register(&hash_ipmark_type);
+}
+
+static void __exit
+hash_ipmark_fini(void)
+{
+ ip_set_type_unregister(&hash_ipmark_type);
+}
+
+module_init(hash_ipmark_init);
+module_exit(hash_ipmark_fini);
#define IPSET_TYPE_REV_MIN 0
/* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */
-#define IPSET_TYPE_REV_MAX 3 /* Comments support added */
+/* 3 Comments support added */
+#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
#define IPSET_TYPE_REV_MIN 0
/* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */
-#define IPSET_TYPE_REV_MAX 3 /* Comments support added */
+/* 3 Comments support added */
+#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
/* 2 Range as input support for IPv4 added */
/* 3 nomatch flag support added */
/* 4 Counters support added */
-#define IPSET_TYPE_REV_MAX 5 /* Comments support added */
+/* 5 Comments support added */
+#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
/* 1 Range as input support for IPv4 added */
/* 2 nomatch flag support added */
/* 3 Counters support added */
-#define IPSET_TYPE_REV_MAX 4 /* Comments support added */
+/* 4 Comments support added */
+#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
/* 1 nomatch flag support added */
/* 2 /0 support added */
/* 3 Counters support added */
-#define IPSET_TYPE_REV_MAX 4 /* Comments support added */
+/* 4 Comments support added */
+#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 0
+#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
/* 2 Range as input support for IPv4 added */
/* 3 nomatch flag support added */
/* 4 Counters support added */
-#define IPSET_TYPE_REV_MAX 5 /* Comments support added */
+/* 5 Comments support added */
+#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 0 /* Comments support added */
+/* 0 Comments support added */
+#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
#define E(a, b, c, d) \
{.ip6 = { \
- __constant_htonl(a), __constant_htonl(b), \
- __constant_htonl(c), __constant_htonl(d), \
+ htonl(a), htonl(b), \
+ htonl(c), htonl(d), \
} }
/*
}
-static const struct genl_ops ip_vs_genl_ops[] __read_mostly = {
+static const struct genl_ops ip_vs_genl_ops[] = {
{
.cmd = IPVS_CMD_NEW_SERVICE,
.flags = GENL_ADMIN_PERM,
spin_lock_bh(&svc->sched_lock);
tbl->dead = 1;
- for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) {
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
ip_vs_lblc_del(en);
atomic_dec(&tbl->entries);
unsigned long now = jiffies;
int i, j;
- for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ for (i = 0, j = tbl->rover; i < IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
spin_lock(&svc->sched_lock);
if (goal > tbl->max_size/2)
goal = tbl->max_size/2;
- for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ for (i = 0, j = tbl->rover; i < IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
spin_lock(&svc->sched_lock);
tbl->rover = j;
out:
- mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
+ mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
}
/*
* Initialize the hash buckets
*/
- for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) {
INIT_HLIST_HEAD(&tbl->bucket[i]);
}
tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
/*
* IPVS LBLC Scheduler structure
*/
-static struct ip_vs_scheduler ip_vs_lblc_scheduler =
-{
+static struct ip_vs_scheduler ip_vs_lblc_scheduler = {
.name = "lblc",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
const struct nlattr *attr) __read_mostly;
EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
-DEFINE_SPINLOCK(nf_conntrack_lock);
-EXPORT_SYMBOL_GPL(nf_conntrack_lock);
+__cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
+EXPORT_SYMBOL_GPL(nf_conntrack_locks);
+
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
+
+static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
+{
+ h1 %= CONNTRACK_LOCKS;
+ h2 %= CONNTRACK_LOCKS;
+ spin_unlock(&nf_conntrack_locks[h1]);
+ if (h1 != h2)
+ spin_unlock(&nf_conntrack_locks[h2]);
+}
+
+/* return true if we need to recompute hashes (in case hash table was resized) */
+static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
+ unsigned int h2, unsigned int sequence)
+{
+ h1 %= CONNTRACK_LOCKS;
+ h2 %= CONNTRACK_LOCKS;
+ if (h1 <= h2) {
+ spin_lock(&nf_conntrack_locks[h1]);
+ if (h1 != h2)
+ spin_lock_nested(&nf_conntrack_locks[h2],
+ SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock(&nf_conntrack_locks[h2]);
+ spin_lock_nested(&nf_conntrack_locks[h1],
+ SINGLE_DEPTH_NESTING);
+ }
+ if (read_seqcount_retry(&net->ct.generation, sequence)) {
+ nf_conntrack_double_unlock(h1, h2);
+ return true;
+ }
+ return false;
+}
+
+static void nf_conntrack_all_lock(void)
+{
+ int i;
+
+ for (i = 0; i < CONNTRACK_LOCKS; i++)
+ spin_lock_nested(&nf_conntrack_locks[i], i);
+}
+
+static void nf_conntrack_all_unlock(void)
+{
+ int i;
+
+ for (i = 0; i < CONNTRACK_LOCKS; i++)
+ spin_unlock(&nf_conntrack_locks[i]);
+}
unsigned int nf_conntrack_htable_size __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
nf_ct_remove_expectations(ct);
}
+/* must be called with local_bh_disable */
+static void nf_ct_add_to_dying_list(struct nf_conn *ct)
+{
+ struct ct_pcpu *pcpu;
+
+ /* add this conntrack to the (per cpu) dying list */
+ ct->cpu = smp_processor_id();
+ pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
+
+ spin_lock(&pcpu->lock);
+ hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+ &pcpu->dying);
+ spin_unlock(&pcpu->lock);
+}
+
+/* must be called with local_bh_disable */
+static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct)
+{
+ struct ct_pcpu *pcpu;
+
+ /* add this conntrack to the (per cpu) unconfirmed list */
+ ct->cpu = smp_processor_id();
+ pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
+
+ spin_lock(&pcpu->lock);
+ hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+ &pcpu->unconfirmed);
+ spin_unlock(&pcpu->lock);
+}
+
+/* must be called with local_bh_disable */
+static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
+{
+ struct ct_pcpu *pcpu;
+
+ /* We overload first tuple to link into unconfirmed or dying list.*/
+ pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
+
+ spin_lock(&pcpu->lock);
+ BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
+ hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+ spin_unlock(&pcpu->lock);
+}
+
static void
destroy_conntrack(struct nf_conntrack *nfct)
{
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
NF_CT_ASSERT(!timer_pending(&ct->timeout));
- /* To make sure we don't get any weird locking issues here:
- * destroy_conntrack() MUST NOT be called with a write lock
- * to nf_conntrack_lock!!! -HW */
rcu_read_lock();
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto && l4proto->destroy)
rcu_read_unlock();
- spin_lock_bh(&nf_conntrack_lock);
+ local_bh_disable();
/* Expectations will have been removed in clean_from_lists,
* except TFTP can create an expectation on the first packet,
* before connection is in the list, so we need to clean here,
- * too. */
+ * too.
+ */
nf_ct_remove_expectations(ct);
- /* We overload first tuple to link into unconfirmed or dying list.*/
- BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
- hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+ nf_ct_del_from_dying_or_unconfirmed_list(ct);
NF_CT_STAT_INC(net, delete);
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
if (ct->master)
nf_ct_put(ct->master);
static void nf_ct_delete_from_lists(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
+ unsigned int hash, reply_hash;
+ u16 zone = nf_ct_zone(ct);
+ unsigned int sequence;
nf_ct_helper_destroy(ct);
- spin_lock_bh(&nf_conntrack_lock);
- /* Inside lock so preempt is disabled on module removal path.
- * Otherwise we can get spurious warnings. */
- NF_CT_STAT_INC(net, delete_list);
+
+ local_bh_disable();
+ do {
+ sequence = read_seqcount_begin(&net->ct.generation);
+ hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ reply_hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+
clean_from_lists(ct);
- /* add this conntrack to the dying list */
- hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &net->ct.dying);
- spin_unlock_bh(&nf_conntrack_lock);
+ nf_conntrack_double_unlock(hash, reply_hash);
+
+ nf_ct_add_to_dying_list(ct);
+
+ NF_CT_STAT_INC(net, delete_list);
+ local_bh_enable();
}
static void death_by_event(unsigned long ul_conntrack)
* Warning :
* - Caller must take a reference on returned object
* and recheck nf_ct_tuple_equal(tuple, &h->tuple)
- * OR
- * - Caller must lock nf_conntrack_lock before calling this function
*/
static struct nf_conntrack_tuple_hash *
____nf_conntrack_find(struct net *net, u16 zone,
static void __nf_conntrack_hash_insert(struct nf_conn *ct,
unsigned int hash,
- unsigned int repl_hash)
+ unsigned int reply_hash)
{
struct net *net = nf_ct_net(ct);
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&net->ct.hash[hash]);
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
- &net->ct.hash[repl_hash]);
+ &net->ct.hash[reply_hash]);
}
int
nf_conntrack_hash_check_insert(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
- unsigned int hash, repl_hash;
+ unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
u16 zone;
+ unsigned int sequence;
zone = nf_ct_zone(ct);
- hash = hash_conntrack(net, zone,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- repl_hash = hash_conntrack(net, zone,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- spin_lock_bh(&nf_conntrack_lock);
+ local_bh_disable();
+ do {
+ sequence = read_seqcount_begin(&net->ct.generation);
+ hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ reply_hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
/* See if there's one in the list already, including reverse */
hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
&h->tuple) &&
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out;
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
+ hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
&h->tuple) &&
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
smp_wmb();
/* The caller holds a reference to this object */
atomic_set(&ct->ct_general.use, 2);
- __nf_conntrack_hash_insert(ct, hash, repl_hash);
+ __nf_conntrack_hash_insert(ct, hash, reply_hash);
+ nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert);
- spin_unlock_bh(&nf_conntrack_lock);
-
+ local_bh_enable();
return 0;
out:
+ nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert_failed);
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
return -EEXIST;
}
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
/* deletion from this larval template list happens via nf_ct_put() */
void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
{
+ struct ct_pcpu *pcpu;
+
__set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
nf_conntrack_get(&tmpl->ct_general);
- spin_lock_bh(&nf_conntrack_lock);
+ /* add this conntrack to the (per cpu) tmpl list */
+ local_bh_disable();
+ tmpl->cpu = smp_processor_id();
+ pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu);
+
+ spin_lock(&pcpu->lock);
/* Overload tuple linked list to put us in template list. */
hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &net->ct.tmpl);
- spin_unlock_bh(&nf_conntrack_lock);
+ &pcpu->tmpl);
+ spin_unlock_bh(&pcpu->lock);
}
EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);
int
__nf_conntrack_confirm(struct sk_buff *skb)
{
- unsigned int hash, repl_hash;
+ unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct nf_conn_help *help;
enum ip_conntrack_info ctinfo;
struct net *net;
u16 zone;
+ unsigned int sequence;
ct = nf_ct_get(skb, &ctinfo);
net = nf_ct_net(ct);
return NF_ACCEPT;
zone = nf_ct_zone(ct);
- /* reuse the hash saved before */
- hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
- hash = hash_bucket(hash, net);
- repl_hash = hash_conntrack(net, zone,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ local_bh_disable();
+
+ do {
+ sequence = read_seqcount_begin(&net->ct.generation);
+ /* reuse the hash saved before */
+ hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
+ hash = hash_bucket(hash, net);
+ reply_hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+ } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
/* We're not in hash table, and we refuse to set up related
- connections for unconfirmed conns. But packet copies and
- REJECT will give spurious warnings here. */
+ * connections for unconfirmed conns. But packet copies and
+ * REJECT will give spurious warnings here.
+ */
/* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
/* No external references means no one else could have
- confirmed us. */
+ * confirmed us.
+ */
NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
pr_debug("Confirming conntrack %p\n", ct);
-
- spin_lock_bh(&nf_conntrack_lock);
-
/* We have to check the DYING flag inside the lock to prevent
a race against nf_ct_get_next_corpse() possibly called from
user context, else we insert an already 'dead' hash, blocking
further use of that particular connection -JM */
if (unlikely(nf_ct_is_dying(ct))) {
- spin_unlock_bh(&nf_conntrack_lock);
+ nf_conntrack_double_unlock(hash, reply_hash);
+ local_bh_enable();
return NF_ACCEPT;
}
&h->tuple) &&
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out;
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
+ hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
&h->tuple) &&
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out;
- /* Remove from unconfirmed list */
- hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+ nf_ct_del_from_dying_or_unconfirmed_list(ct);
/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
* guarantee that no other CPU can find the conntrack before the above
* stores are visible.
*/
- __nf_conntrack_hash_insert(ct, hash, repl_hash);
+ __nf_conntrack_hash_insert(ct, hash, reply_hash);
+ nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert);
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
help = nfct_help(ct);
if (help && help->helper)
return NF_ACCEPT;
out:
+ nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert_failed);
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
return NF_DROP;
}
EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
/* There's a small race here where we may free a just-assured
connection. Too bad: we're in trouble anyway. */
-static noinline int early_drop(struct net *net, unsigned int hash)
+static noinline int early_drop(struct net *net, unsigned int _hash)
{
/* Use oldest entry, which is roughly LRU */
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct = NULL, *tmp;
struct hlist_nulls_node *n;
- unsigned int i, cnt = 0;
+ unsigned int i = 0, cnt = 0;
int dropped = 0;
+ unsigned int hash, sequence;
+ spinlock_t *lockp;
- rcu_read_lock();
- for (i = 0; i < net->ct.htable_size; i++) {
+ local_bh_disable();
+restart:
+ sequence = read_seqcount_begin(&net->ct.generation);
+ hash = hash_bucket(_hash, net);
+ for (; i < net->ct.htable_size; i++) {
+ lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
+ spin_lock(lockp);
+ if (read_seqcount_retry(&net->ct.generation, sequence)) {
+ spin_unlock(lockp);
+ goto restart;
+ }
hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
- if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
+ if (!test_bit(IPS_ASSURED_BIT, &tmp->status) &&
+ !nf_ct_is_dying(tmp) &&
+ atomic_inc_not_zero(&tmp->ct_general.use)) {
ct = tmp;
+ break;
+ }
cnt++;
}
- if (ct != NULL) {
- if (likely(!nf_ct_is_dying(ct) &&
- atomic_inc_not_zero(&ct->ct_general.use)))
- break;
- else
- ct = NULL;
- }
+ hash = (hash + 1) % net->ct.htable_size;
+ spin_unlock(lockp);
- if (cnt >= NF_CT_EVICTION_RANGE)
+ if (ct || cnt >= NF_CT_EVICTION_RANGE)
break;
- hash = (hash + 1) % net->ct.htable_size;
}
- rcu_read_unlock();
+ local_bh_enable();
if (!ct)
return dropped;
if (nf_conntrack_max &&
unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
- if (!early_drop(net, hash_bucket(hash, net))) {
+ if (!early_drop(net, hash)) {
atomic_dec(&net->ct.count);
net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
return ERR_PTR(-ENOMEM);
struct nf_conn_help *help;
struct nf_conntrack_tuple repl_tuple;
struct nf_conntrack_ecache *ecache;
- struct nf_conntrack_expect *exp;
+ struct nf_conntrack_expect *exp = NULL;
u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
struct nf_conn_timeout *timeout_ext;
unsigned int *timeouts;
ecache ? ecache->expmask : 0,
GFP_ATOMIC);
- spin_lock_bh(&nf_conntrack_lock);
- exp = nf_ct_find_expectation(net, zone, tuple);
- if (exp) {
- pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
- ct, exp);
- /* Welcome, Mr. Bond. We've been expecting you... */
- __set_bit(IPS_EXPECTED_BIT, &ct->status);
- ct->master = exp->master;
- if (exp->helper) {
- help = nf_ct_helper_ext_add(ct, exp->helper,
- GFP_ATOMIC);
- if (help)
- rcu_assign_pointer(help->helper, exp->helper);
- }
+ local_bh_disable();
+ if (net->ct.expect_count) {
+ spin_lock(&nf_conntrack_expect_lock);
+ exp = nf_ct_find_expectation(net, zone, tuple);
+ if (exp) {
+ pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
+ ct, exp);
+ /* Welcome, Mr. Bond. We've been expecting you... */
+ __set_bit(IPS_EXPECTED_BIT, &ct->status);
+ /* exp->master safe, refcnt bumped in nf_ct_find_expectation */
+ ct->master = exp->master;
+ if (exp->helper) {
+ help = nf_ct_helper_ext_add(ct, exp->helper,
+ GFP_ATOMIC);
+ if (help)
+ rcu_assign_pointer(help->helper, exp->helper);
+ }
#ifdef CONFIG_NF_CONNTRACK_MARK
- ct->mark = exp->master->mark;
+ ct->mark = exp->master->mark;
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
- ct->secmark = exp->master->secmark;
+ ct->secmark = exp->master->secmark;
#endif
- nf_conntrack_get(&ct->master->ct_general);
- NF_CT_STAT_INC(net, expect_new);
- } else {
+ NF_CT_STAT_INC(net, expect_new);
+ }
+ spin_unlock(&nf_conntrack_expect_lock);
+ }
+ if (!exp) {
__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
NF_CT_STAT_INC(net, new);
}
/* Now it is inserted into the unconfirmed list, bump refcount */
nf_conntrack_get(&ct->ct_general);
+ nf_ct_add_to_unconfirmed_list(ct);
- /* Overload tuple linked list to put us in unconfirmed list. */
- hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &net->ct.unconfirmed);
-
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
if (exp) {
if (exp->expectfn)
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct hlist_nulls_node *n;
+ int cpu;
+ spinlock_t *lockp;
- spin_lock_bh(&nf_conntrack_lock);
for (; *bucket < net->ct.htable_size; (*bucket)++) {
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
- if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
- continue;
+ lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
+ local_bh_disable();
+ spin_lock(lockp);
+ if (*bucket < net->ct.htable_size) {
+ hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
+ if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+ continue;
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (iter(ct, data))
+ goto found;
+ }
+ }
+ spin_unlock(lockp);
+ local_bh_enable();
+ }
+
+ for_each_possible_cpu(cpu) {
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+ spin_lock_bh(&pcpu->lock);
+ hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (iter(ct, data))
- goto found;
+ set_bit(IPS_DYING_BIT, &ct->status);
}
+ spin_unlock_bh(&pcpu->lock);
}
- hlist_nulls_for_each_entry(h, n, &net->ct.unconfirmed, hnnode) {
- ct = nf_ct_tuplehash_to_ctrack(h);
- if (iter(ct, data))
- set_bit(IPS_DYING_BIT, &ct->status);
- }
- spin_unlock_bh(&nf_conntrack_lock);
return NULL;
found:
atomic_inc(&ct->ct_general.use);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock(lockp);
+ local_bh_enable();
return ct;
}
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct hlist_nulls_node *n;
+ int cpu;
- spin_lock_bh(&nf_conntrack_lock);
- hlist_nulls_for_each_entry(h, n, &net->ct.dying, hnnode) {
- ct = nf_ct_tuplehash_to_ctrack(h);
- /* never fails to remove them, no listeners at this point */
- nf_ct_kill(ct);
+ for_each_possible_cpu(cpu) {
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+ spin_lock_bh(&pcpu->lock);
+ hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ /* never fails to remove them, no listeners at this point */
+ nf_ct_kill(ct);
+ }
+ spin_unlock_bh(&pcpu->lock);
}
- spin_unlock_bh(&nf_conntrack_lock);
}
static int untrack_refs(void)
kmem_cache_destroy(net->ct.nf_conntrack_cachep);
kfree(net->ct.slabname);
free_percpu(net->ct.stat);
+ free_percpu(net->ct.pcpu_lists);
}
}
if (!hash)
return -ENOMEM;
+ local_bh_disable();
+ nf_conntrack_all_lock();
+ write_seqcount_begin(&init_net.ct.generation);
+
/* Lookups in the old hash might happen in parallel, which means we
* might get false negatives during connection lookup. New connections
* created because of a false negative won't make it into the hash
- * though since that required taking the lock.
+ * though since that required taking the locks.
*/
- spin_lock_bh(&nf_conntrack_lock);
+
for (i = 0; i < init_net.ct.htable_size; i++) {
while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
h = hlist_nulls_entry(init_net.ct.hash[i].first,
init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
init_net.ct.hash = hash;
- spin_unlock_bh(&nf_conntrack_lock);
+
+ write_seqcount_end(&init_net.ct.generation);
+ nf_conntrack_all_unlock();
+ local_bh_enable();
nf_ct_free_hashtable(old_hash, old_size);
return 0;
int nf_conntrack_init_start(void)
{
int max_factor = 8;
- int ret, cpu;
+ int i, ret, cpu;
+
+ for (i = 0; i < CONNTRACK_LOCKS; i++)
+ spin_lock_init(&nf_conntrack_locks[i]);
/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
* machine has 512 buckets. >= 1GB machines have 16384 buckets. */
int nf_conntrack_init_net(struct net *net)
{
- int ret;
+ int ret = -ENOMEM;
+ int cpu;
atomic_set(&net->ct.count, 0);
- INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL);
- INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL);
- INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL);
- net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
- if (!net->ct.stat) {
- ret = -ENOMEM;
+
+ net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
+ if (!net->ct.pcpu_lists)
goto err_stat;
+
+ for_each_possible_cpu(cpu) {
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+ spin_lock_init(&pcpu->lock);
+ INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
+ INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
+ INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL);
}
+ net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
+ if (!net->ct.stat)
+ goto err_pcpu_lists;
+
net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
- if (!net->ct.slabname) {
- ret = -ENOMEM;
+ if (!net->ct.slabname)
goto err_slabname;
- }
net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
sizeof(struct nf_conn), 0,
SLAB_DESTROY_BY_RCU, NULL);
if (!net->ct.nf_conntrack_cachep) {
printk(KERN_ERR "Unable to create nf_conn slab cache\n");
- ret = -ENOMEM;
goto err_cache;
}
net->ct.htable_size = nf_conntrack_htable_size;
net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
if (!net->ct.hash) {
- ret = -ENOMEM;
printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
goto err_hash;
}
kfree(net->ct.slabname);
err_slabname:
free_percpu(net->ct.stat);
+err_pcpu_lists:
+ free_percpu(net->ct.pcpu_lists);
err_stat:
return ret;
}
{
struct nf_conntrack_expect *exp = (void *)ul_expect;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
nf_ct_unlink_expect(exp);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
nf_ct_expect_put(exp);
}
if (!nf_ct_is_confirmed(exp->master))
return NULL;
+ /* Avoid race with other CPUs, that for exp->master ct, is
+ * about to invoke ->destroy(), or nf_ct_delete() via timeout
+ * or early_drop().
+ *
+ * The atomic_inc_not_zero() check tells: If that fails, we
+ * know that the ct is being destroyed. If it succeeds, we
+ * can be sure the ct cannot disappear underneath.
+ */
+ if (unlikely(nf_ct_is_dying(exp->master) ||
+ !atomic_inc_not_zero(&exp->master->ct_general.use)))
+ return NULL;
+
if (exp->flags & NF_CT_EXPECT_PERMANENT) {
atomic_inc(&exp->use);
return exp;
nf_ct_unlink_expect(exp);
return exp;
}
+ /* Undo exp->master refcnt increase, if del_timer() failed */
+ nf_ct_put(exp->master);
return NULL;
}
if (!help)
return;
+ spin_lock_bh(&nf_conntrack_expect_lock);
hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
}
}
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
/* Generally a bad idea to call this: could have matched already. */
void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
{
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
helper = rcu_dereference_protected(master_help->helper,
- lockdep_is_held(&nf_conntrack_lock));
+ lockdep_is_held(&nf_conntrack_expect_lock));
if (helper) {
exp->timeout.expires = jiffies +
helper->expect_policy[exp->class].timeout * HZ;
}
/* Will be over limit? */
helper = rcu_dereference_protected(master_help->helper,
- lockdep_is_held(&nf_conntrack_lock));
+ lockdep_is_held(&nf_conntrack_expect_lock));
if (helper) {
p = &helper->expect_policy[expect->class];
if (p->max_expected &&
return ret;
}
-int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
+int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
u32 portid, int report)
{
int ret;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
ret = __nf_ct_expect_check(expect);
if (ret <= 0)
goto out;
ret = nf_ct_expect_insert(expect);
if (ret < 0)
goto out;
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
return ret;
out:
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
nf_ct_refresh(ct, skb, info->timeout * HZ);
/* Set expect timeout */
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3,
info->sig_port[!dir]);
if (exp) {
nf_ct_dump_tuple(&exp->tuple);
set_expect_timeout(exp, info->timeout);
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
return 0;
}
EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
+/* appropiate ct lock protecting must be taken by caller */
static inline int unhelp(struct nf_conntrack_tuple_hash *i,
const struct nf_conntrack_helper *me)
{
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
struct nf_conn_help *help = nfct_help(ct);
- if (help && rcu_dereference_protected(
- help->helper,
- lockdep_is_held(&nf_conntrack_lock)
- ) == me) {
+ if (help && rcu_dereference_raw(help->helper) == me) {
nf_conntrack_event(IPCT_HELPER, ct);
RCU_INIT_POINTER(help->helper, NULL);
}
void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n)
{
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
list_add_rcu(&n->head, &nf_ct_helper_expectfn_list);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_register);
void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n)
{
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
list_del_rcu(&n->head);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister);
const struct hlist_node *next;
const struct hlist_nulls_node *nn;
unsigned int i;
+ int cpu;
/* Get rid of expectations */
+ spin_lock_bh(&nf_conntrack_expect_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, next,
&net->ct.expect_hash[i], hnode) {
struct nf_conn_help *help = nfct_help(exp->master);
if ((rcu_dereference_protected(
help->helper,
- lockdep_is_held(&nf_conntrack_lock)
+ lockdep_is_held(&nf_conntrack_expect_lock)
) == me || exp->helper == me) &&
del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
}
}
}
+ spin_unlock_bh(&nf_conntrack_expect_lock);
/* Get rid of expecteds, set helpers to NULL. */
- hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
- unhelp(h, me);
- for (i = 0; i < net->ct.htable_size; i++) {
- hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
+ for_each_possible_cpu(cpu) {
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+ spin_lock_bh(&pcpu->lock);
+ hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
unhelp(h, me);
+ spin_unlock_bh(&pcpu->lock);
+ }
+ local_bh_disable();
+ for (i = 0; i < net->ct.htable_size; i++) {
+ spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+ if (i < net->ct.htable_size) {
+ hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
+ unhelp(h, me);
+ }
+ spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
}
+ local_bh_enable();
}
void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
synchronize_rcu();
rtnl_lock();
- spin_lock_bh(&nf_conntrack_lock);
for_each_net(net)
__nf_conntrack_helper_unregister(me, net);
- spin_unlock_bh(&nf_conntrack_lock);
rtnl_unlock();
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
int res;
+ spinlock_t *lockp;
+
#ifdef CONFIG_NF_CONNTRACK_MARK
const struct ctnetlink_dump_filter *filter = cb->data;
#endif
- spin_lock_bh(&nf_conntrack_lock);
last = (struct nf_conn *)cb->args[1];
+
+ local_bh_disable();
for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
restart:
+ lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
+ spin_lock(lockp);
+ if (cb->args[0] >= net->ct.htable_size) {
+ spin_unlock(lockp);
+ goto out;
+ }
hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]],
hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
if (res < 0) {
nf_conntrack_get(&ct->ct_general);
cb->args[1] = (unsigned long)ct;
+ spin_unlock(lockp);
goto out;
}
}
+ spin_unlock(lockp);
if (cb->args[1]) {
cb->args[1] = 0;
goto restart;
}
}
out:
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
if (last)
nf_ct_put(last);
return 0;
}
-#define __CTA_LABELS_MAX_LENGTH ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
[CTA_TUPLE_ORIG] = { .type = NLA_NESTED },
[CTA_TUPLE_REPLY] = { .type = NLA_NESTED },
[CTA_ZONE] = { .type = NLA_U16 },
[CTA_MARK_MASK] = { .type = NLA_U32 },
[CTA_LABELS] = { .type = NLA_BINARY,
- .len = __CTA_LABELS_MAX_LENGTH },
+ .len = NF_CT_LABELS_MAX_SIZE },
[CTA_LABELS_MASK] = { .type = NLA_BINARY,
- .len = __CTA_LABELS_MAX_LENGTH },
+ .len = NF_CT_LABELS_MAX_SIZE },
};
static int
}
static int
-ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb,
- struct hlist_nulls_head *list)
+ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying)
{
- struct nf_conn *ct, *last;
+ struct nf_conn *ct, *last = NULL;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
int res;
+ int cpu;
+ struct hlist_nulls_head *list;
+ struct net *net = sock_net(skb->sk);
if (cb->args[2])
return 0;
- spin_lock_bh(&nf_conntrack_lock);
- last = (struct nf_conn *)cb->args[1];
-restart:
- hlist_nulls_for_each_entry(h, n, list, hnnode) {
- ct = nf_ct_tuplehash_to_ctrack(h);
- if (l3proto && nf_ct_l3num(ct) != l3proto)
+ if (cb->args[0] == nr_cpu_ids)
+ return 0;
+
+ for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) {
+ struct ct_pcpu *pcpu;
+
+ if (!cpu_possible(cpu))
continue;
- if (cb->args[1]) {
- if (ct != last)
+
+ pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+ spin_lock_bh(&pcpu->lock);
+ last = (struct nf_conn *)cb->args[1];
+ list = dying ? &pcpu->dying : &pcpu->unconfirmed;
+restart:
+ hlist_nulls_for_each_entry(h, n, list, hnnode) {
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (l3proto && nf_ct_l3num(ct) != l3proto)
continue;
- cb->args[1] = 0;
- }
- rcu_read_lock();
- res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
- ct);
- rcu_read_unlock();
- if (res < 0) {
- nf_conntrack_get(&ct->ct_general);
- cb->args[1] = (unsigned long)ct;
- goto out;
+ if (cb->args[1]) {
+ if (ct != last)
+ continue;
+ cb->args[1] = 0;
+ }
+ rcu_read_lock();
+ res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+ ct);
+ rcu_read_unlock();
+ if (res < 0) {
+ nf_conntrack_get(&ct->ct_general);
+ cb->args[1] = (unsigned long)ct;
+ spin_unlock_bh(&pcpu->lock);
+ goto out;
+ }
}
+ if (cb->args[1]) {
+ cb->args[1] = 0;
+ goto restart;
+ } else
+ cb->args[2] = 1;
+ spin_unlock_bh(&pcpu->lock);
}
- if (cb->args[1]) {
- cb->args[1] = 0;
- goto restart;
- } else
- cb->args[2] = 1;
out:
- spin_unlock_bh(&nf_conntrack_lock);
if (last)
nf_ct_put(last);
static int
ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
-
- return ctnetlink_dump_list(skb, cb, &net->ct.dying);
+ return ctnetlink_dump_list(skb, cb, true);
}
static int
static int
ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
-
- return ctnetlink_dump_list(skb, cb, &net->ct.unconfirmed);
+ return ctnetlink_dump_list(skb, cb, false);
}
static int
nf_ct_protonum(ct));
if (helper == NULL) {
#ifdef CONFIG_MODULES
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
if (request_module("nfct-helper-%s", helpname) < 0) {
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
return -EOPNOTSUPP;
}
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper)
err = -EEXIST;
ct = nf_ct_tuplehash_to_ctrack(h);
if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
err = ctnetlink_change_conntrack(ct, cda);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
if (err == 0) {
nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
(1 << IPCT_ASSURED) |
if (ret < 0)
return ret;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return ret;
}
}
/* after list removal, usage count == 1 */
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
nlmsg_report(nlh));
nf_ct_expect_put(exp);
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
/* have to put what we 'get' above.
* after this line usage count == 0 */
nf_ct_expect_put(exp);
struct nf_conn_help *m_help;
/* delete all expectations for this helper */
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, next,
&net->ct.expect_hash[i],
}
}
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
} else {
/* This basically means we have to flush everything*/
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, next,
&net->ct.expect_hash[i],
}
}
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
return 0;
if (err < 0)
return err;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
exp = __nf_ct_expect_find(net, zone, &tuple);
if (!exp) {
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
err = -ENOENT;
if (nlh->nlmsg_flags & NLM_F_CREATE) {
err = ctnetlink_create_expect(net, zone, cda,
err = -EEXIST;
if (!(nlh->nlmsg_flags & NLM_F_EXCL))
err = ctnetlink_change_expect(exp, cda);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return err;
}
struct hlist_node *next;
int found = 0;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
if (exp->class != SIP_EXPECT_SIGNALLING ||
!nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) ||
found = 1;
break;
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return found;
}
struct nf_conntrack_expect *exp;
struct hlist_node *next;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media)
continue;
if (!media)
break;
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
if (chain->stats) {
- /* nfnl_lock is held, add some nfnl function for this, later */
struct nft_stats __percpu *oldstats =
- rcu_dereference_protected(chain->stats, 1);
+ nft_dereference(chain->stats);
rcu_assign_pointer(chain->stats, newstats);
synchronize_rcu();
return err;
}
-static void nf_tables_expr_destroy(struct nft_expr *expr)
+static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
+ struct nft_expr *expr)
{
if (expr->ops->destroy)
- expr->ops->destroy(expr);
+ expr->ops->destroy(ctx, expr);
module_put(expr->ops->type->owner);
}
[NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED },
[NFTA_RULE_COMPAT] = { .type = NLA_NESTED },
[NFTA_RULE_POSITION] = { .type = NLA_U64 },
+ [NFTA_RULE_USERDATA] = { .type = NLA_BINARY,
+ .len = NFT_USERDATA_MAXLEN },
};
static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
}
nla_nest_end(skb, list);
+ if (rule->ulen &&
+ nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule)))
+ goto nla_put_failure;
+
return nlmsg_end(skb, nlh);
nla_put_failure:
return err;
}
-static void nf_tables_rule_destroy(struct nft_rule *rule)
+static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
+ struct nft_rule *rule)
{
struct nft_expr *expr;
*/
expr = nft_expr_first(rule);
while (expr->ops && expr != nft_expr_last(rule)) {
- nf_tables_expr_destroy(expr);
+ nf_tables_expr_destroy(ctx, expr);
expr = nft_expr_next(expr);
}
kfree(rule);
static struct nft_expr_info *info;
static struct nft_rule_trans *
-nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx)
+nf_tables_trans_add(struct nft_ctx *ctx, struct nft_rule *rule)
{
struct nft_rule_trans *rupd;
if (rupd == NULL)
return NULL;
- rupd->chain = ctx->chain;
- rupd->table = ctx->table;
+ rupd->ctx = *ctx;
rupd->rule = rule;
- rupd->family = ctx->afi->family;
- rupd->nlh = ctx->nlh;
list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
return rupd;
struct nft_expr *expr;
struct nft_ctx ctx;
struct nlattr *tmp;
- unsigned int size, i, n;
+ unsigned int size, i, n, ulen = 0;
int err, rem;
bool create;
u64 handle, pos_handle;
}
}
+ if (nla[NFTA_RULE_USERDATA])
+ ulen = nla_len(nla[NFTA_RULE_USERDATA]);
+
err = -ENOMEM;
- rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL);
+ rule = kzalloc(sizeof(*rule) + size + ulen, GFP_KERNEL);
if (rule == NULL)
goto err1;
rule->handle = handle;
rule->dlen = size;
+ rule->ulen = ulen;
+
+ if (ulen)
+ nla_memcpy(nft_userdata(rule), nla[NFTA_RULE_USERDATA], ulen);
expr = nft_expr_first(rule);
for (i = 0; i < n; i++) {
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
if (nft_rule_is_active_next(net, old_rule)) {
- repl = nf_tables_trans_add(old_rule, &ctx);
+ repl = nf_tables_trans_add(&ctx, old_rule);
if (repl == NULL) {
err = -ENOMEM;
goto err2;
list_add_rcu(&rule->list, &chain->rules);
}
- if (nf_tables_trans_add(rule, &ctx) == NULL) {
+ if (nf_tables_trans_add(&ctx, rule) == NULL) {
err = -ENOMEM;
goto err3;
}
kfree(repl);
}
err2:
- nf_tables_rule_destroy(rule);
+ nf_tables_rule_destroy(&ctx, rule);
err1:
for (i = 0; i < n; i++) {
if (info[i].ops != NULL)
{
/* You cannot delete the same rule twice */
if (nft_rule_is_active_next(ctx->net, rule)) {
- if (nf_tables_trans_add(rule, ctx) == NULL)
+ if (nf_tables_trans_add(ctx, rule) == NULL)
return -ENOMEM;
nft_rule_disactivate_next(ctx->net, rule);
return 0;
*/
if (nft_rule_is_active(net, rupd->rule)) {
nft_rule_clear(net, rupd->rule);
- nf_tables_rule_notify(skb, rupd->nlh, rupd->table,
- rupd->chain, rupd->rule,
- NFT_MSG_NEWRULE, 0,
- rupd->family);
+ nf_tables_rule_notify(skb, rupd->ctx.nlh,
+ rupd->ctx.table, rupd->ctx.chain,
+ rupd->rule, NFT_MSG_NEWRULE, 0,
+ rupd->ctx.afi->family);
list_del(&rupd->list);
kfree(rupd);
continue;
/* This rule is in the past, get rid of it */
list_del_rcu(&rupd->rule->list);
- nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain,
+ nf_tables_rule_notify(skb, rupd->ctx.nlh,
+ rupd->ctx.table, rupd->ctx.chain,
rupd->rule, NFT_MSG_DELRULE, 0,
- rupd->family);
+ rupd->ctx.afi->family);
}
/* Make sure we don't see any packet traversing old rules */
/* Now we can safely release unused old rules */
list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- nf_tables_rule_destroy(rupd->rule);
+ nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
list_del(&rupd->list);
kfree(rupd);
}
synchronize_rcu();
list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- nf_tables_rule_destroy(rupd->rule);
+ nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
list_del(&rupd->list);
kfree(rupd);
}
static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
{
list_del(&set->list);
- if (!(set->flags & NFT_SET_ANONYMOUS))
- nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
+ nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
set->ops->destroy(set);
module_put(set->ops->owner);
data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
switch (data->verdict) {
- case NF_ACCEPT:
- case NF_DROP:
- case NF_QUEUE:
+ default:
+ switch (data->verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ case NF_DROP:
+ case NF_QUEUE:
+ break;
+ default:
+ return -EINVAL;
+ }
+ /* fall through */
case NFT_CONTINUE:
case NFT_BREAK:
case NFT_RETURN:
data->chain = chain;
desc->len = sizeof(data);
break;
- default:
- return -EINVAL;
}
desc->type = NFT_DATA_VERDICT;
}
EXPORT_SYMBOL_GPL(nfnl_unlock);
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_nfnl_is_held(u8 subsys_id)
+{
+ return lockdep_is_held(&table[subsys_id].mutex);
+}
+EXPORT_SYMBOL_GPL(lockdep_nfnl_is_held);
+#endif
+
int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
{
nfnl_lock(n->subsys_id);
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/list.h>
-#include <linux/jhash.h>
-#include <linux/random.h>
#include <linux/slab.h>
#include <net/sock.h>
#include <net/netfilter/nf_log.h>
};
#define INSTANCE_BUCKETS 16
-static unsigned int hash_init;
static int nfnl_log_net_id __read_mostly;
{
int status = -ENOMEM;
- /* it's not really all that important to have a random value, so
- * we can do this from the init function, even if there hasn't
- * been that much entropy yet */
- get_random_bytes(&hash_init, sizeof(hash_init));
-
netlink_register_notifier(&nfulnl_rtnl_notifier);
status = nfnetlink_subsys_register(&nfulnl_subsys);
if (status < 0) {
}
static void
-nft_target_destroy(const struct nft_expr *expr)
+nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
struct xt_target *target = expr->ops->data;
}
static void
-nft_match_destroy(const struct nft_expr *expr)
+nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
struct xt_match *match = expr->ops->data;
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_labels.h>
struct nft_ct {
enum nft_ct_keys key:8;
enum ip_conntrack_dir dir:8;
- union{
+ union {
enum nft_registers dreg:8;
enum nft_registers sreg:8;
};
- uint8_t family;
};
static void nft_ct_get_eval(const struct nft_expr *expr,
goto err;
strncpy((char *)dest->data, helper->name, sizeof(dest->data));
return;
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ case NFT_CT_LABELS: {
+ struct nf_conn_labels *labels = nf_ct_labels_find(ct);
+ unsigned int size;
+
+ if (!labels) {
+ memset(dest->data, 0, sizeof(dest->data));
+ return;
+ }
+
+ BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > sizeof(dest->data));
+ size = labels->words * sizeof(long);
+
+ memcpy(dest->data, labels->bits, size);
+ if (size < sizeof(dest->data))
+ memset(((char *) dest->data) + size, 0,
+ sizeof(dest->data) - size);
+ return;
+ }
+#endif
}
tuple = &ct->tuplehash[priv->dir].tuple;
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
case NFT_CT_SECMARK:
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ case NFT_CT_LABELS:
#endif
case NFT_CT_EXPIRATION:
case NFT_CT_HELPER:
if (err < 0)
return err;
- priv->family = ctx->afi->family;
-
return 0;
}
-static void nft_ct_destroy(const struct nft_expr *expr)
+static void nft_ct_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
- struct nft_ct *priv = nft_expr_priv(expr);
-
- nft_ct_l3proto_module_put(priv->family);
+ nft_ct_l3proto_module_put(ctx->afi->family);
}
static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
/*
- * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
+#define NFT_HASH_MIN_SIZE 4
+
struct nft_hash {
- struct hlist_head *hash;
- unsigned int hsize;
+ struct nft_hash_table __rcu *tbl;
+};
+
+struct nft_hash_table {
+ unsigned int size;
+ unsigned int elements;
+ struct nft_hash_elem __rcu *buckets[];
};
struct nft_hash_elem {
- struct hlist_node hnode;
- struct nft_data key;
- struct nft_data data[];
+ struct nft_hash_elem __rcu *next;
+ struct nft_data key;
+ struct nft_data data[];
};
+#define nft_hash_for_each_entry(i, head) \
+ for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next))
+#define nft_hash_for_each_entry_rcu(i, head) \
+ for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next))
+
static u32 nft_hash_rnd __read_mostly;
static bool nft_hash_rnd_initted __read_mostly;
unsigned int h;
h = jhash(data->data, len, nft_hash_rnd);
- return ((u64)h * hsize) >> 32;
+ return h & (hsize - 1);
}
static bool nft_hash_lookup(const struct nft_set *set,
struct nft_data *data)
{
const struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_table *tbl = rcu_dereference(priv->tbl);
const struct nft_hash_elem *he;
unsigned int h;
- h = nft_hash_data(key, priv->hsize, set->klen);
- hlist_for_each_entry(he, &priv->hash[h], hnode) {
+ h = nft_hash_data(key, tbl->size, set->klen);
+ nft_hash_for_each_entry_rcu(he, tbl->buckets[h]) {
if (nft_data_cmp(&he->key, key, set->klen))
continue;
if (set->flags & NFT_SET_MAP)
return false;
}
-static void nft_hash_elem_destroy(const struct nft_set *set,
- struct nft_hash_elem *he)
+static void nft_hash_tbl_free(const struct nft_hash_table *tbl)
{
- nft_data_uninit(&he->key, NFT_DATA_VALUE);
- if (set->flags & NFT_SET_MAP)
- nft_data_uninit(he->data, set->dtype);
- kfree(he);
+ if (is_vmalloc_addr(tbl))
+ vfree(tbl);
+ else
+ kfree(tbl);
+}
+
+static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets)
+{
+ struct nft_hash_table *tbl;
+ size_t size;
+
+ size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
+ tbl = kzalloc(size, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN);
+ if (tbl == NULL)
+ tbl = vzalloc(size);
+ if (tbl == NULL)
+ return NULL;
+ tbl->size = nbuckets;
+
+ return tbl;
+}
+
+static void nft_hash_chain_unzip(const struct nft_set *set,
+ const struct nft_hash_table *ntbl,
+ struct nft_hash_table *tbl, unsigned int n)
+{
+ struct nft_hash_elem *he, *last, *next;
+ unsigned int h;
+
+ he = nft_dereference(tbl->buckets[n]);
+ if (he == NULL)
+ return;
+ h = nft_hash_data(&he->key, ntbl->size, set->klen);
+
+ /* Find last element of first chain hashing to bucket h */
+ last = he;
+ nft_hash_for_each_entry(he, he->next) {
+ if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
+ break;
+ last = he;
+ }
+
+ /* Unlink first chain from the old table */
+ RCU_INIT_POINTER(tbl->buckets[n], last->next);
+
+ /* If end of chain reached, done */
+ if (he == NULL)
+ return;
+
+ /* Find first element of second chain hashing to bucket h */
+ next = NULL;
+ nft_hash_for_each_entry(he, he->next) {
+ if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
+ continue;
+ next = he;
+ break;
+ }
+
+ /* Link the two chains */
+ RCU_INIT_POINTER(last->next, next);
+}
+
+static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv)
+{
+ struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
+ struct nft_hash_elem *he;
+ unsigned int i, h;
+ bool complete;
+
+ ntbl = nft_hash_tbl_alloc(tbl->size * 2);
+ if (ntbl == NULL)
+ return -ENOMEM;
+
+ /* Link new table's buckets to first element in the old table
+ * hashing to the new bucket.
+ */
+ for (i = 0; i < ntbl->size; i++) {
+ h = i < tbl->size ? i : i - tbl->size;
+ nft_hash_for_each_entry(he, tbl->buckets[h]) {
+ if (nft_hash_data(&he->key, ntbl->size, set->klen) != i)
+ continue;
+ RCU_INIT_POINTER(ntbl->buckets[i], he);
+ break;
+ }
+ }
+ ntbl->elements = tbl->elements;
+
+ /* Publish new table */
+ rcu_assign_pointer(priv->tbl, ntbl);
+
+ /* Unzip interleaved hash chains */
+ do {
+ /* Wait for readers to use new table/unzipped chains */
+ synchronize_rcu();
+
+ complete = true;
+ for (i = 0; i < tbl->size; i++) {
+ nft_hash_chain_unzip(set, ntbl, tbl, i);
+ if (tbl->buckets[i] != NULL)
+ complete = false;
+ }
+ } while (!complete);
+
+ nft_hash_tbl_free(tbl);
+ return 0;
+}
+
+static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv)
+{
+ struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
+ struct nft_hash_elem __rcu **pprev;
+ unsigned int i;
+
+ ntbl = nft_hash_tbl_alloc(tbl->size / 2);
+ if (ntbl == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < ntbl->size; i++) {
+ ntbl->buckets[i] = tbl->buckets[i];
+
+ for (pprev = &ntbl->buckets[i]; *pprev != NULL;
+ pprev = &nft_dereference(*pprev)->next)
+ ;
+ RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
+ }
+ ntbl->elements = tbl->elements;
+
+ /* Publish new table */
+ rcu_assign_pointer(priv->tbl, ntbl);
+ synchronize_rcu();
+
+ nft_hash_tbl_free(tbl);
+ return 0;
}
static int nft_hash_insert(const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_table *tbl = nft_dereference(priv->tbl);
struct nft_hash_elem *he;
unsigned int size, h;
if (set->flags & NFT_SET_MAP)
nft_data_copy(he->data, &elem->data);
- h = nft_hash_data(&he->key, priv->hsize, set->klen);
- hlist_add_head_rcu(&he->hnode, &priv->hash[h]);
+ h = nft_hash_data(&he->key, tbl->size, set->klen);
+ RCU_INIT_POINTER(he->next, tbl->buckets[h]);
+ rcu_assign_pointer(tbl->buckets[h], he);
+ tbl->elements++;
+
+ /* Expand table when exceeding 75% load */
+ if (tbl->elements > tbl->size / 4 * 3)
+ nft_hash_tbl_expand(set, priv);
+
return 0;
}
+static void nft_hash_elem_destroy(const struct nft_set *set,
+ struct nft_hash_elem *he)
+{
+ nft_data_uninit(&he->key, NFT_DATA_VALUE);
+ if (set->flags & NFT_SET_MAP)
+ nft_data_uninit(he->data, set->dtype);
+ kfree(he);
+}
+
static void nft_hash_remove(const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct nft_hash_elem *he = elem->cookie;
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_table *tbl = nft_dereference(priv->tbl);
+ struct nft_hash_elem *he, __rcu **pprev;
- hlist_del_rcu(&he->hnode);
+ pprev = elem->cookie;
+ he = nft_dereference((*pprev));
+
+ RCU_INIT_POINTER(*pprev, he->next);
+ synchronize_rcu();
kfree(he);
+ tbl->elements--;
+
+ /* Shrink table beneath 30% load */
+ if (tbl->elements < tbl->size * 3 / 10 &&
+ tbl->size > NFT_HASH_MIN_SIZE)
+ nft_hash_tbl_shrink(set, priv);
}
static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
{
const struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
+ struct nft_hash_elem __rcu * const *pprev;
struct nft_hash_elem *he;
unsigned int h;
- h = nft_hash_data(&elem->key, priv->hsize, set->klen);
- hlist_for_each_entry(he, &priv->hash[h], hnode) {
- if (nft_data_cmp(&he->key, &elem->key, set->klen))
+ h = nft_hash_data(&elem->key, tbl->size, set->klen);
+ pprev = &tbl->buckets[h];
+ nft_hash_for_each_entry(he, tbl->buckets[h]) {
+ if (nft_data_cmp(&he->key, &elem->key, set->klen)) {
+ pprev = &he->next;
continue;
+ }
- elem->cookie = he;
- elem->flags = 0;
+ elem->cookie = (void *)pprev;
+ elem->flags = 0;
if (set->flags & NFT_SET_MAP)
nft_data_copy(&elem->data, he->data);
return 0;
struct nft_set_iter *iter)
{
const struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
const struct nft_hash_elem *he;
struct nft_set_elem elem;
unsigned int i;
- for (i = 0; i < priv->hsize; i++) {
- hlist_for_each_entry(he, &priv->hash[i], hnode) {
+ for (i = 0; i < tbl->size; i++) {
+ nft_hash_for_each_entry(he, tbl->buckets[i]) {
if (iter->count < iter->skip)
goto cont;
const struct nlattr * const tb[])
{
struct nft_hash *priv = nft_set_priv(set);
- unsigned int cnt, i;
+ struct nft_hash_table *tbl;
if (unlikely(!nft_hash_rnd_initted)) {
get_random_bytes(&nft_hash_rnd, 4);
nft_hash_rnd_initted = true;
}
- /* Aim for a load factor of 0.75 */
- // FIXME: temporarily broken until we have set descriptions
- cnt = 100;
- cnt = cnt * 4 / 3;
-
- priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL);
- if (priv->hash == NULL)
+ tbl = nft_hash_tbl_alloc(NFT_HASH_MIN_SIZE);
+ if (tbl == NULL)
return -ENOMEM;
- priv->hsize = cnt;
-
- for (i = 0; i < cnt; i++)
- INIT_HLIST_HEAD(&priv->hash[i]);
-
+ RCU_INIT_POINTER(priv->tbl, tbl);
return 0;
}
static void nft_hash_destroy(const struct nft_set *set)
{
const struct nft_hash *priv = nft_set_priv(set);
- const struct hlist_node *next;
- struct nft_hash_elem *elem;
+ const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
+ struct nft_hash_elem *he, *next;
unsigned int i;
- for (i = 0; i < priv->hsize; i++) {
- hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) {
- hlist_del(&elem->hnode);
- nft_hash_elem_destroy(set, elem);
+ for (i = 0; i < tbl->size; i++) {
+ for (he = nft_dereference(tbl->buckets[i]); he != NULL;
+ he = next) {
+ next = nft_dereference(he->next);
+ nft_hash_elem_destroy(set, he);
}
}
- kfree(priv->hash);
+ kfree(tbl);
}
static struct nft_set_ops nft_hash_ops __read_mostly = {
return err;
}
-static void nft_immediate_destroy(const struct nft_expr *expr)
+static void nft_immediate_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
return 0;
}
-static void nft_log_destroy(const struct nft_expr *expr)
+static void nft_log_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
struct nft_log *priv = nft_expr_priv(expr);
return 0;
}
-static void nft_lookup_destroy(const struct nft_expr *expr)
+static void nft_lookup_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
struct nft_lookup *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(NULL, priv->set, &priv->binding);
+ nf_tables_unbind_set(ctx, priv->set, &priv->binding);
}
static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
enum nft_registers sreg_addr_max:8;
enum nft_registers sreg_proto_min:8;
enum nft_registers sreg_proto_max:8;
- int family;
- enum nf_nat_manip_type type;
+ enum nf_nat_manip_type type:8;
+ u8 family;
};
static void nft_nat_eval(const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_nat *priv = nft_expr_priv(expr);
+ u32 family;
int err;
if (tb[NFTA_NAT_TYPE] == NULL)
if (tb[NFTA_NAT_FAMILY] == NULL)
return -EINVAL;
- priv->family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
- if (priv->family != AF_INET && priv->family != AF_INET6)
- return -EINVAL;
+ family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
+ if (family != AF_INET && family != AF_INET6)
+ return -EAFNOSUPPORT;
+ if (family != ctx->afi->family)
+ return -EOPNOTSUPP;
+ priv->family = family;
if (tb[NFTA_NAT_REG_ADDR_MIN]) {
priv->sreg_addr_min = ntohl(nla_get_be32(
static int __init nft_nat_module_init(void)
{
- int err;
-
- err = nft_register_expr(&nft_nat_type);
- if (err < 0)
- return err;
-
- return 0;
+ return nft_register_expr(&nft_nat_type);
}
static void __exit nft_nat_module_exit(void)
if (par->family == NFPROTO_BRIDGE) {
switch (eth_hdr(skb)->h_proto) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
audit_ip4(ab, skb);
break;
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
audit_ip6(ab, skb);
break;
}
#include <linux/jhash.h>
#include <linux/slab.h>
#include <linux/list.h>
+#include <linux/rbtree.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/skbuff.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#define CONNLIMIT_SLOTS 32
+#define CONNLIMIT_LOCK_SLOTS 32
+#define CONNLIMIT_GC_MAX_NODES 8
+
/* we will save the tuples of all connections we care about */
struct xt_connlimit_conn {
struct hlist_node node;
union nf_inet_addr addr;
};
+struct xt_connlimit_rb {
+ struct rb_node node;
+ struct hlist_head hhead; /* connections/hosts in same subnet */
+ union nf_inet_addr addr; /* search key */
+};
+
struct xt_connlimit_data {
- struct hlist_head iphash[256];
- spinlock_t lock;
+ struct rb_root climit_root4[CONNLIMIT_SLOTS];
+ struct rb_root climit_root6[CONNLIMIT_SLOTS];
+ spinlock_t locks[CONNLIMIT_LOCK_SLOTS];
};
static u_int32_t connlimit_rnd __read_mostly;
+static struct kmem_cache *connlimit_rb_cachep __read_mostly;
+static struct kmem_cache *connlimit_conn_cachep __read_mostly;
static inline unsigned int connlimit_iphash(__be32 addr)
{
- return jhash_1word((__force __u32)addr, connlimit_rnd) & 0xFF;
+ return jhash_1word((__force __u32)addr,
+ connlimit_rnd) % CONNLIMIT_SLOTS;
}
static inline unsigned int
for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
res.ip6[i] = addr->ip6[i] & mask->ip6[i];
- return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), connlimit_rnd) & 0xFF;
+ return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6),
+ connlimit_rnd) % CONNLIMIT_SLOTS;
}
static inline bool already_closed(const struct nf_conn *conn)
return 0;
}
-static inline unsigned int
+static int
same_source_net(const union nf_inet_addr *addr,
const union nf_inet_addr *mask,
const union nf_inet_addr *u3, u_int8_t family)
{
if (family == NFPROTO_IPV4) {
- return (addr->ip & mask->ip) == (u3->ip & mask->ip);
+ return ntohl(addr->ip & mask->ip) -
+ ntohl(u3->ip & mask->ip);
} else {
union nf_inet_addr lh, rh;
unsigned int i;
rh.ip6[i] = u3->ip6[i] & mask->ip6[i];
}
- return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)) == 0;
+ return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6));
}
}
-static int count_them(struct net *net,
- struct xt_connlimit_data *data,
+static bool add_hlist(struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
- const union nf_inet_addr *addr,
- const union nf_inet_addr *mask,
- u_int8_t family)
+ const union nf_inet_addr *addr)
+{
+ struct xt_connlimit_conn *conn;
+
+ conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
+ if (conn == NULL)
+ return false;
+ conn->tuple = *tuple;
+ conn->addr = *addr;
+ hlist_add_head(&conn->node, head);
+ return true;
+}
+
+static unsigned int check_hlist(struct net *net,
+ struct hlist_head *head,
+ const struct nf_conntrack_tuple *tuple,
+ bool *addit)
{
const struct nf_conntrack_tuple_hash *found;
struct xt_connlimit_conn *conn;
struct hlist_node *n;
struct nf_conn *found_ct;
- struct hlist_head *hash;
- bool addit = true;
- int matches = 0;
-
- if (family == NFPROTO_IPV6)
- hash = &data->iphash[connlimit_iphash6(addr, mask)];
- else
- hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)];
+ unsigned int length = 0;
+ *addit = true;
rcu_read_lock();
/* check the saved connections */
- hlist_for_each_entry_safe(conn, n, hash, node) {
+ hlist_for_each_entry_safe(conn, n, head, node) {
found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE,
&conn->tuple);
- found_ct = NULL;
+ if (found == NULL) {
+ hlist_del(&conn->node);
+ kmem_cache_free(connlimit_conn_cachep, conn);
+ continue;
+ }
- if (found != NULL)
- found_ct = nf_ct_tuplehash_to_ctrack(found);
+ found_ct = nf_ct_tuplehash_to_ctrack(found);
- if (found_ct != NULL &&
- nf_ct_tuple_equal(&conn->tuple, tuple) &&
- !already_closed(found_ct))
+ if (nf_ct_tuple_equal(&conn->tuple, tuple)) {
/*
* Just to be sure we have it only once in the list.
* We should not see tuples twice unless someone hooks
* this into a table without "-p tcp --syn".
*/
- addit = false;
-
- if (found == NULL) {
- /* this one is gone */
- hlist_del(&conn->node);
- kfree(conn);
- continue;
- }
-
- if (already_closed(found_ct)) {
+ *addit = false;
+ } else if (already_closed(found_ct)) {
/*
* we do not care about connections which are
* closed already -> ditch it
*/
nf_ct_put(found_ct);
hlist_del(&conn->node);
- kfree(conn);
+ kmem_cache_free(connlimit_conn_cachep, conn);
continue;
}
- if (same_source_net(addr, mask, &conn->addr, family))
- /* same source network -> be counted! */
- ++matches;
nf_ct_put(found_ct);
+ length++;
}
rcu_read_unlock();
- if (addit) {
- /* save the new connection in our list */
- conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
- if (conn == NULL)
- return -ENOMEM;
- conn->tuple = *tuple;
- conn->addr = *addr;
- hlist_add_head(&conn->node, hash);
- ++matches;
+ return length;
+}
+
+static void tree_nodes_free(struct rb_root *root,
+ struct xt_connlimit_rb *gc_nodes[],
+ unsigned int gc_count)
+{
+ struct xt_connlimit_rb *rbconn;
+
+ while (gc_count) {
+ rbconn = gc_nodes[--gc_count];
+ rb_erase(&rbconn->node, root);
+ kmem_cache_free(connlimit_rb_cachep, rbconn);
+ }
+}
+
+static unsigned int
+count_tree(struct net *net, struct rb_root *root,
+ const struct nf_conntrack_tuple *tuple,
+ const union nf_inet_addr *addr, const union nf_inet_addr *mask,
+ u8 family)
+{
+ struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
+ struct rb_node **rbnode, *parent;
+ struct xt_connlimit_rb *rbconn;
+ struct xt_connlimit_conn *conn;
+ unsigned int gc_count;
+ bool no_gc = false;
+
+ restart:
+ gc_count = 0;
+ parent = NULL;
+ rbnode = &(root->rb_node);
+ while (*rbnode) {
+ int diff;
+ bool addit;
+
+ rbconn = container_of(*rbnode, struct xt_connlimit_rb, node);
+
+ parent = *rbnode;
+ diff = same_source_net(addr, mask, &rbconn->addr, family);
+ if (diff < 0) {
+ rbnode = &((*rbnode)->rb_left);
+ } else if (diff > 0) {
+ rbnode = &((*rbnode)->rb_right);
+ } else {
+ /* same source network -> be counted! */
+ unsigned int count;
+ count = check_hlist(net, &rbconn->hhead, tuple, &addit);
+
+ tree_nodes_free(root, gc_nodes, gc_count);
+ if (!addit)
+ return count;
+
+ if (!add_hlist(&rbconn->hhead, tuple, addr))
+ return 0; /* hotdrop */
+
+ return count + 1;
+ }
+
+ if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
+ continue;
+
+ /* only used for GC on hhead, retval and 'addit' ignored */
+ check_hlist(net, &rbconn->hhead, tuple, &addit);
+ if (hlist_empty(&rbconn->hhead))
+ gc_nodes[gc_count++] = rbconn;
+ }
+
+ if (gc_count) {
+ no_gc = true;
+ tree_nodes_free(root, gc_nodes, gc_count);
+ /* tree_node_free before new allocation permits
+ * allocator to re-use newly free'd object.
+ *
+ * This is a rare event; in most cases we will find
+ * existing node to re-use. (or gc_count is 0).
+ */
+ goto restart;
+ }
+
+ /* no match, need to insert new node */
+ rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC);
+ if (rbconn == NULL)
+ return 0;
+
+ conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
+ if (conn == NULL) {
+ kmem_cache_free(connlimit_rb_cachep, rbconn);
+ return 0;
+ }
+
+ conn->tuple = *tuple;
+ conn->addr = *addr;
+ rbconn->addr = *addr;
+
+ INIT_HLIST_HEAD(&rbconn->hhead);
+ hlist_add_head(&conn->node, &rbconn->hhead);
+
+ rb_link_node(&rbconn->node, parent, rbnode);
+ rb_insert_color(&rbconn->node, root);
+ return 1;
+}
+
+static int count_them(struct net *net,
+ struct xt_connlimit_data *data,
+ const struct nf_conntrack_tuple *tuple,
+ const union nf_inet_addr *addr,
+ const union nf_inet_addr *mask,
+ u_int8_t family)
+{
+ struct rb_root *root;
+ int count;
+ u32 hash;
+
+ if (family == NFPROTO_IPV6) {
+ hash = connlimit_iphash6(addr, mask);
+ root = &data->climit_root6[hash];
+ } else {
+ hash = connlimit_iphash(addr->ip & mask->ip);
+ root = &data->climit_root4[hash];
}
- return matches;
+ spin_lock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]);
+
+ count = count_tree(net, root, tuple, addr, mask, family);
+
+ spin_unlock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]);
+
+ return count;
}
static bool
const struct nf_conntrack_tuple *tuple_ptr = &tuple;
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
- int connections;
+ unsigned int connections;
ct = nf_ct_get(skb, &ctinfo);
if (ct != NULL)
iph->daddr : iph->saddr;
}
- spin_lock_bh(&info->data->lock);
connections = count_them(net, info->data, tuple_ptr, &addr,
&info->mask, par->family);
- spin_unlock_bh(&info->data->lock);
-
- if (connections < 0)
+ if (connections == 0)
/* kmalloc failed, drop it entirely */
goto hotdrop;
return -ENOMEM;
}
- spin_lock_init(&info->data->lock);
- for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i)
- INIT_HLIST_HEAD(&info->data->iphash[i]);
+ for (i = 0; i < ARRAY_SIZE(info->data->locks); ++i)
+ spin_lock_init(&info->data->locks[i]);
+
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
+ info->data->climit_root4[i] = RB_ROOT;
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
+ info->data->climit_root6[i] = RB_ROOT;
return 0;
}
-static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
+static void destroy_tree(struct rb_root *r)
{
- const struct xt_connlimit_info *info = par->matchinfo;
struct xt_connlimit_conn *conn;
+ struct xt_connlimit_rb *rbconn;
struct hlist_node *n;
- struct hlist_head *hash = info->data->iphash;
+ struct rb_node *node;
+
+ while ((node = rb_first(r)) != NULL) {
+ rbconn = container_of(node, struct xt_connlimit_rb, node);
+
+ rb_erase(node, r);
+
+ hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
+ kmem_cache_free(connlimit_conn_cachep, conn);
+
+ kmem_cache_free(connlimit_rb_cachep, rbconn);
+ }
+}
+
+static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ const struct xt_connlimit_info *info = par->matchinfo;
unsigned int i;
nf_ct_l3proto_module_put(par->family);
- for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) {
- hlist_for_each_entry_safe(conn, n, &hash[i], node) {
- hlist_del(&conn->node);
- kfree(conn);
- }
- }
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
+ destroy_tree(&info->data->climit_root4[i]);
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
+ destroy_tree(&info->data->climit_root6[i]);
kfree(info->data);
}
static int __init connlimit_mt_init(void)
{
- return xt_register_match(&connlimit_mt_reg);
+ int ret;
+
+ BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS);
+ BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0);
+
+ connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn",
+ sizeof(struct xt_connlimit_conn),
+ 0, 0, NULL);
+ if (!connlimit_conn_cachep)
+ return -ENOMEM;
+
+ connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb",
+ sizeof(struct xt_connlimit_rb),
+ 0, 0, NULL);
+ if (!connlimit_rb_cachep) {
+ kmem_cache_destroy(connlimit_conn_cachep);
+ return -ENOMEM;
+ }
+ ret = xt_register_match(&connlimit_mt_reg);
+ if (ret != 0) {
+ kmem_cache_destroy(connlimit_conn_cachep);
+ kmem_cache_destroy(connlimit_rb_cachep);
+ }
+ return ret;
}
static void __exit connlimit_mt_exit(void)
{
xt_unregister_match(&connlimit_mt_reg);
+ kmem_cache_destroy(connlimit_conn_cachep);
+ kmem_cache_destroy(connlimit_rb_cachep);
}
module_init(connlimit_mt_init);
}
return spi_match(compinfo->spis[0], compinfo->spis[1],
- ntohl(chdr->cpi << 16),
+ ntohs(chdr->cpi),
!!(compinfo->invflags & XT_IPCOMP_INV_SPI));
}
#include <net/act_api.h>
#include <net/pkt_cls.h>
-#define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *))
+#define HTSIZE 256
struct fw_head {
- struct fw_filter *ht[HTSIZE];
- u32 mask;
+ u32 mask;
+ struct fw_filter *ht[HTSIZE];
};
struct fw_filter {
struct tcf_exts exts;
};
-static inline int fw_hash(u32 handle)
+static u32 fw_hash(u32 handle)
{
- if (HTSIZE == 4096)
- return ((handle >> 24) & 0xFFF) ^
- ((handle >> 12) & 0xFFF) ^
- (handle & 0xFFF);
- else if (HTSIZE == 2048)
- return ((handle >> 22) & 0x7FF) ^
- ((handle >> 11) & 0x7FF) ^
- (handle & 0x7FF);
- else if (HTSIZE == 1024)
- return ((handle >> 20) & 0x3FF) ^
- ((handle >> 10) & 0x3FF) ^
- (handle & 0x3FF);
- else if (HTSIZE == 512)
- return (handle >> 27) ^
- ((handle >> 18) & 0x1FF) ^
- ((handle >> 9) & 0x1FF) ^
- (handle & 0x1FF);
- else if (HTSIZE == 256) {
- u8 *t = (u8 *) &handle;
- return t[0] ^ t[1] ^ t[2] ^ t[3];
- } else
- return handle & (HTSIZE - 1);
+ handle ^= (handle >> 16);
+ handle ^= (handle >> 8);
+ return handle % HTSIZE;
}
static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
static struct kmem_cache *secpath_cachep __read_mostly;
+static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
+static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO];
+
+int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo)
+{
+ int err = 0;
+
+ if (unlikely(afinfo == NULL))
+ return -EINVAL;
+ if (unlikely(afinfo->family >= NPROTO))
+ return -EAFNOSUPPORT;
+ spin_lock_bh(&xfrm_input_afinfo_lock);
+ if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL))
+ err = -ENOBUFS;
+ else
+ rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo);
+ spin_unlock_bh(&xfrm_input_afinfo_lock);
+ return err;
+}
+EXPORT_SYMBOL(xfrm_input_register_afinfo);
+
+int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo)
+{
+ int err = 0;
+
+ if (unlikely(afinfo == NULL))
+ return -EINVAL;
+ if (unlikely(afinfo->family >= NPROTO))
+ return -EAFNOSUPPORT;
+ spin_lock_bh(&xfrm_input_afinfo_lock);
+ if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) {
+ if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo))
+ err = -EINVAL;
+ else
+ RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL);
+ }
+ spin_unlock_bh(&xfrm_input_afinfo_lock);
+ synchronize_rcu();
+ return err;
+}
+EXPORT_SYMBOL(xfrm_input_unregister_afinfo);
+
+static struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family)
+{
+ struct xfrm_input_afinfo *afinfo;
+
+ if (unlikely(family >= NPROTO))
+ return NULL;
+ rcu_read_lock();
+ afinfo = rcu_dereference(xfrm_input_afinfo[family]);
+ if (unlikely(!afinfo))
+ rcu_read_unlock();
+ return afinfo;
+}
+
+static void xfrm_input_put_afinfo(struct xfrm_input_afinfo *afinfo)
+{
+ rcu_read_unlock();
+}
+
+static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol,
+ int err)
+{
+ int ret;
+ struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family);
+
+ if (!afinfo)
+ return -EAFNOSUPPORT;
+
+ ret = afinfo->callback(skb, protocol, err);
+ xfrm_input_put_afinfo(afinfo);
+
+ return ret;
+}
+
void __secpath_destroy(struct sec_path *sp)
{
int i;
EXPORT_SYMBOL(xfrm_alloc_spi);
static bool __xfrm_state_filter_match(struct xfrm_state *x,
- struct xfrm_filter *filter)
+ struct xfrm_address_filter *filter)
{
if (filter) {
if ((filter->family == AF_INET ||
EXPORT_SYMBOL(xfrm_state_walk);
void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
- struct xfrm_filter *filter)
+ struct xfrm_address_filter *filter)
{
INIT_LIST_HEAD(&walk->all);
walk->proto = proto;
if (!cb->args[0]) {
struct nlattr *attrs[XFRMA_MAX+1];
- struct xfrm_filter *filter = NULL;
+ struct xfrm_address_filter *filter = NULL;
u8 proto = 0;
int err;
if (err < 0)
return err;
- if (attrs[XFRMA_FILTER]) {
+ if (attrs[XFRMA_ADDRESS_FILTER]) {
filter = kmalloc(sizeof(*filter), GFP_KERNEL);
if (filter == NULL)
return -ENOMEM;
- memcpy(filter, nla_data(attrs[XFRMA_FILTER]),
+ memcpy(filter, nla_data(attrs[XFRMA_ADDRESS_FILTER]),
sizeof(*filter));
}
[XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) },
[XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 },
[XFRMA_PROTO] = { .type = NLA_U8 },
- [XFRMA_FILTER] = { .len = sizeof(struct xfrm_filter) },
+ [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
};
static const struct xfrm_link {