================
AF_RDS, PF_RDS, SOL_RDS
- These constants haven't been assigned yet, because RDS isn't in
- mainline yet. Currently, the kernel module assigns some constant
- and publishes it to user space through two sysctl files
- /proc/sys/net/rds/pf_rds
- /proc/sys/net/rds/sol_rds
+ AF_RDS and PF_RDS are the domain type to be used with socket(2)
+ to create RDS sockets. SOL_RDS is the socket-level to be used
+ with setsockopt(2) and getsockopt(2) for RDS specific socket
+ options.
fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
This creates a new, unbound RDS socket.
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include <asm/ebcdic.h>
#include "hypfs.h"
ctx->more = 0;
- while (iov_iter_count(&msg->msg_iter)) {
- int len = iov_iter_count(&msg->msg_iter);
+ while (msg_data_left(msg)) {
+ int len = msg_data_left(msg);
if (len > limit)
len = limit;
atomic_dec(&ctx->inflight);
skcipher_free_async_sgls(sreq);
kfree(req);
- aio_complete(iocb, err, err);
+ iocb->ki_complete(iocb, err, err);
}
static inline int skcipher_sndbuf(struct sock *sk)
long copied = 0;
lock_sock(sk);
- while (iov_iter_count(&msg->msg_iter)) {
+ while (msg_data_left(msg)) {
sgl = list_first_entry(&ctx->tsgl,
struct skcipher_sg_list, list);
sg = sgl->sg;
goto unlock;
}
- used = min_t(unsigned long, ctx->used, iov_iter_count(&msg->msg_iter));
+ used = min_t(unsigned long, ctx->used, msg_data_left(msg));
used = af_alg_make_sg(&ctx->rsgl, &msg->msg_iter, used);
err = used;
#include <linux/pfn.h>
#include <linux/export.h>
#include <linux/io.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include <linux/uaccess.h>
#include <linux/types.h> /* size_t */
#include <linux/proc_fs.h>
#include <linux/fcntl.h> /* O_ACCMODE */
-#include <linux/aio.h>
#include <linux/pagemap.h>
#include <linux/hugetlb.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/io.h>
-#include <linux/aio.h>
#include <linux/jiffies.h>
#include <linux/cpu.h>
#include <asm/pgtable.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/io.h>
-#include <linux/aio.h>
#include <linux/jiffies.h>
#include <asm/pgtable.h>
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/fcntl.h>
-#include <linux/aio.h>
#include <linux/ioctl.h>
#include <linux/cdev.h>
#include <linux/list.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/fcntl.h>
-#include <linux/aio.h>
#include <linux/poll.h>
#include <linux/init.h>
#include <linux/ioctl.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/fcntl.h>
-#include <linux/aio.h>
#include <linux/pci.h>
#include <linux/poll.h>
#include <linux/ioctl.h>
struct bgmac_dma_desc *dma_desc;
u32 ctl1;
- if (i == ring->num_slots - 1)
+ if (i == BGMAC_TX_RING_SLOTS - 1)
ctl0 |= BGMAC_DESC_CTL0_EOT;
ctl1 = len & BGMAC_DESC_CTL1_LEN;
{
struct device *dma_dev = bgmac->core->dma_dev;
struct net_device *net_dev = bgmac->net_dev;
- struct bgmac_slot_info *slot = &ring->slots[ring->end];
- int free_slots;
+ int index = ring->end % BGMAC_TX_RING_SLOTS;
+ struct bgmac_slot_info *slot = &ring->slots[index];
int nr_frags;
u32 flags;
- int index = ring->end;
int i;
if (skb->len > BGMAC_DESC_CTL1_LEN) {
nr_frags = skb_shinfo(skb)->nr_frags;
- if (ring->start <= ring->end)
- free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
- else
- free_slots = ring->start - ring->end;
-
- if (free_slots <= nr_frags + 1) {
+ /* ring->end - ring->start will return the number of valid slots,
+ * even when ring->end overflows
+ */
+ if (ring->end - ring->start + nr_frags + 1 >= BGMAC_TX_RING_SLOTS) {
bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
netif_stop_queue(net_dev);
return NETDEV_TX_BUSY;
}
slot->skb = skb;
-
+ ring->end += nr_frags + 1;
netdev_sent_queue(net_dev, skb->len);
wmb();
/* Increase ring->end to point empty slot. We tell hardware the first
* slot it should *not* read.
*/
- ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
ring->index_base +
- ring->end * sizeof(struct bgmac_dma_desc));
+ (ring->end % BGMAC_TX_RING_SLOTS) *
+ sizeof(struct bgmac_dma_desc));
- free_slots -= nr_frags + 1;
- if (free_slots < 8)
+ if (ring->end - ring->start >= BGMAC_TX_RING_SLOTS - 8)
netif_stop_queue(net_dev);
return NETDEV_TX_OK;
empty_slot &= BGMAC_DMA_TX_STATDPTR;
empty_slot /= sizeof(struct bgmac_dma_desc);
- while (ring->start != empty_slot) {
- struct bgmac_slot_info *slot = &ring->slots[ring->start];
- u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
- int len = ctl1 & BGMAC_DESC_CTL1_LEN;
+ while (ring->start != ring->end) {
+ int slot_idx = ring->start % BGMAC_TX_RING_SLOTS;
+ struct bgmac_slot_info *slot = &ring->slots[slot_idx];
+ u32 ctl1;
+ int len;
- if (!slot->dma_addr) {
- bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
- ring->start, ring->end);
- goto next;
- }
+ if (slot_idx == empty_slot)
+ break;
+ ctl1 = le32_to_cpu(ring->cpu_base[slot_idx].ctl1);
+ len = ctl1 & BGMAC_DESC_CTL1_LEN;
if (ctl1 & BGMAC_DESC_CTL0_SOF)
/* Unmap no longer used buffer */
dma_unmap_single(dma_dev, slot->dma_addr, len,
slot->skb = NULL;
}
-next:
slot->dma_addr = 0;
- if (++ring->start >= BGMAC_TX_RING_SLOTS)
- ring->start = 0;
+ ring->start++;
freed = true;
}
return -ENOMEM;
/* Poison - if everything goes fine, hardware will overwrite it */
- rx = buf;
+ rx = buf + BGMAC_RX_BUF_OFFSET;
rx->len = cpu_to_le16(0xdead);
rx->flags = cpu_to_le16(0xbeef);
/* Map skb for the DMA */
- dma_addr = dma_map_single(dma_dev, buf, BGMAC_RX_BUF_SIZE,
- DMA_FROM_DEVICE);
+ dma_addr = dma_map_single(dma_dev, buf + BGMAC_RX_BUF_OFFSET,
+ BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
if (dma_mapping_error(dma_dev, dma_addr)) {
bgmac_err(bgmac, "DMA mapping error\n");
put_page(virt_to_head_page(buf));
return 0;
}
+static void bgmac_dma_rx_update_index(struct bgmac *bgmac,
+ struct bgmac_dma_ring *ring)
+{
+ dma_wmb();
+
+ bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
+ ring->index_base +
+ ring->end * sizeof(struct bgmac_dma_desc));
+}
+
static void bgmac_dma_rx_setup_desc(struct bgmac *bgmac,
struct bgmac_dma_ring *ring, int desc_idx)
{
struct bgmac_dma_desc *dma_desc = ring->cpu_base + desc_idx;
u32 ctl0 = 0, ctl1 = 0;
- if (desc_idx == ring->num_slots - 1)
+ if (desc_idx == BGMAC_RX_RING_SLOTS - 1)
ctl0 |= BGMAC_DESC_CTL0_EOT;
ctl1 |= BGMAC_RX_BUF_SIZE & BGMAC_DESC_CTL1_LEN;
/* Is there any BGMAC device that requires extension? */
dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[desc_idx].dma_addr));
dma_desc->ctl0 = cpu_to_le32(ctl0);
dma_desc->ctl1 = cpu_to_le32(ctl1);
+
+ ring->end = desc_idx;
+}
+
+static void bgmac_dma_rx_poison_buf(struct device *dma_dev,
+ struct bgmac_slot_info *slot)
+{
+ struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET;
+
+ dma_sync_single_for_cpu(dma_dev, slot->dma_addr, BGMAC_RX_BUF_SIZE,
+ DMA_FROM_DEVICE);
+ rx->len = cpu_to_le16(0xdead);
+ rx->flags = cpu_to_le16(0xbeef);
+ dma_sync_single_for_device(dma_dev, slot->dma_addr, BGMAC_RX_BUF_SIZE,
+ DMA_FROM_DEVICE);
}
static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
end_slot &= BGMAC_DMA_RX_STATDPTR;
end_slot /= sizeof(struct bgmac_dma_desc);
- ring->end = end_slot;
-
- while (ring->start != ring->end) {
+ while (ring->start != end_slot) {
struct device *dma_dev = bgmac->core->dma_dev;
struct bgmac_slot_info *slot = &ring->slots[ring->start];
- struct bgmac_rx_header *rx = slot->buf;
+ struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET;
struct sk_buff *skb;
void *buf = slot->buf;
+ dma_addr_t dma_addr = slot->dma_addr;
u16 len, flags;
- /* Unmap buffer to make it accessible to the CPU */
- dma_sync_single_for_cpu(dma_dev, slot->dma_addr,
- BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+ do {
+ /* Prepare new skb as replacement */
+ if (bgmac_dma_rx_skb_for_slot(bgmac, slot)) {
+ bgmac_dma_rx_poison_buf(dma_dev, slot);
+ break;
+ }
- /* Get info from the header */
- len = le16_to_cpu(rx->len);
- flags = le16_to_cpu(rx->flags);
+ /* Unmap buffer to make it accessible to the CPU */
+ dma_unmap_single(dma_dev, dma_addr,
+ BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
- do {
- dma_addr_t old_dma_addr = slot->dma_addr;
- int err;
+ /* Get info from the header */
+ len = le16_to_cpu(rx->len);
+ flags = le16_to_cpu(rx->flags);
/* Check for poison and drop or pass the packet */
if (len == 0xdead && flags == 0xbeef) {
bgmac_err(bgmac, "Found poisoned packet at slot %d, DMA issue!\n",
ring->start);
- dma_sync_single_for_device(dma_dev,
- slot->dma_addr,
- BGMAC_RX_BUF_SIZE,
- DMA_FROM_DEVICE);
+ put_page(virt_to_head_page(buf));
break;
}
- /* Omit CRC. */
- len -= ETH_FCS_LEN;
-
- /* Prepare new skb as replacement */
- err = bgmac_dma_rx_skb_for_slot(bgmac, slot);
- if (err) {
- /* Poison the old skb */
- rx->len = cpu_to_le16(0xdead);
- rx->flags = cpu_to_le16(0xbeef);
-
- dma_sync_single_for_device(dma_dev,
- slot->dma_addr,
- BGMAC_RX_BUF_SIZE,
- DMA_FROM_DEVICE);
+ if (len > BGMAC_RX_ALLOC_SIZE) {
+ bgmac_err(bgmac, "Found oversized packet at slot %d, DMA issue!\n",
+ ring->start);
+ put_page(virt_to_head_page(buf));
break;
}
- bgmac_dma_rx_setup_desc(bgmac, ring, ring->start);
- /* Unmap old skb, we'll pass it to the netfif */
- dma_unmap_single(dma_dev, old_dma_addr,
- BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+ /* Omit CRC. */
+ len -= ETH_FCS_LEN;
skb = build_skb(buf, BGMAC_RX_ALLOC_SIZE);
- skb_put(skb, BGMAC_RX_FRAME_OFFSET + len);
- skb_pull(skb, BGMAC_RX_FRAME_OFFSET);
+ skb_put(skb, BGMAC_RX_FRAME_OFFSET +
+ BGMAC_RX_BUF_OFFSET + len);
+ skb_pull(skb, BGMAC_RX_FRAME_OFFSET +
+ BGMAC_RX_BUF_OFFSET);
skb_checksum_none_assert(skb);
skb->protocol = eth_type_trans(skb, bgmac->net_dev);
handled++;
} while (0);
+ bgmac_dma_rx_setup_desc(bgmac, ring, ring->start);
+
if (++ring->start >= BGMAC_RX_RING_SLOTS)
ring->start = 0;
break;
}
+ bgmac_dma_rx_update_index(bgmac, ring);
+
return handled;
}
struct bgmac_slot_info *slot;
int i;
- for (i = 0; i < ring->num_slots; i++) {
+ for (i = 0; i < BGMAC_TX_RING_SLOTS; i++) {
int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
slot = &ring->slots[i];
struct bgmac_slot_info *slot;
int i;
- for (i = 0; i < ring->num_slots; i++) {
+ for (i = 0; i < BGMAC_RX_RING_SLOTS; i++) {
slot = &ring->slots[i];
- if (!slot->buf)
+ if (!slot->dma_addr)
continue;
- if (slot->dma_addr)
- dma_unmap_single(dma_dev, slot->dma_addr,
- BGMAC_RX_BUF_SIZE,
- DMA_FROM_DEVICE);
+ dma_unmap_single(dma_dev, slot->dma_addr,
+ BGMAC_RX_BUF_SIZE,
+ DMA_FROM_DEVICE);
put_page(virt_to_head_page(slot->buf));
+ slot->dma_addr = 0;
}
}
static void bgmac_dma_ring_desc_free(struct bgmac *bgmac,
- struct bgmac_dma_ring *ring)
+ struct bgmac_dma_ring *ring,
+ int num_slots)
{
struct device *dma_dev = bgmac->core->dma_dev;
int size;
return;
/* Free ring of descriptors */
- size = ring->num_slots * sizeof(struct bgmac_dma_desc);
+ size = num_slots * sizeof(struct bgmac_dma_desc);
dma_free_coherent(dma_dev, size, ring->cpu_base,
ring->dma_base);
}
-static void bgmac_dma_free(struct bgmac *bgmac)
+static void bgmac_dma_cleanup(struct bgmac *bgmac)
{
int i;
- for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) {
+ for (i = 0; i < BGMAC_MAX_TX_RINGS; i++)
bgmac_dma_tx_ring_free(bgmac, &bgmac->tx_ring[i]);
- bgmac_dma_ring_desc_free(bgmac, &bgmac->tx_ring[i]);
- }
- for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) {
+
+ for (i = 0; i < BGMAC_MAX_RX_RINGS; i++)
bgmac_dma_rx_ring_free(bgmac, &bgmac->rx_ring[i]);
- bgmac_dma_ring_desc_free(bgmac, &bgmac->rx_ring[i]);
- }
+}
+
+static void bgmac_dma_free(struct bgmac *bgmac)
+{
+ int i;
+
+ for (i = 0; i < BGMAC_MAX_TX_RINGS; i++)
+ bgmac_dma_ring_desc_free(bgmac, &bgmac->tx_ring[i],
+ BGMAC_TX_RING_SLOTS);
+
+ for (i = 0; i < BGMAC_MAX_RX_RINGS; i++)
+ bgmac_dma_ring_desc_free(bgmac, &bgmac->rx_ring[i],
+ BGMAC_RX_RING_SLOTS);
}
static int bgmac_dma_alloc(struct bgmac *bgmac)
for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) {
ring = &bgmac->tx_ring[i];
- ring->num_slots = BGMAC_TX_RING_SLOTS;
ring->mmio_base = ring_base[i];
/* Alloc ring of descriptors */
- size = ring->num_slots * sizeof(struct bgmac_dma_desc);
+ size = BGMAC_TX_RING_SLOTS * sizeof(struct bgmac_dma_desc);
ring->cpu_base = dma_zalloc_coherent(dma_dev, size,
&ring->dma_base,
GFP_KERNEL);
}
for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) {
- int j;
-
ring = &bgmac->rx_ring[i];
- ring->num_slots = BGMAC_RX_RING_SLOTS;
ring->mmio_base = ring_base[i];
/* Alloc ring of descriptors */
- size = ring->num_slots * sizeof(struct bgmac_dma_desc);
+ size = BGMAC_RX_RING_SLOTS * sizeof(struct bgmac_dma_desc);
ring->cpu_base = dma_zalloc_coherent(dma_dev, size,
&ring->dma_base,
GFP_KERNEL);
ring->index_base = lower_32_bits(ring->dma_base);
else
ring->index_base = 0;
-
- /* Alloc RX slots */
- for (j = 0; j < ring->num_slots; j++) {
- err = bgmac_dma_rx_skb_for_slot(bgmac, &ring->slots[j]);
- if (err) {
- bgmac_err(bgmac, "Can't allocate skb for slot in RX ring\n");
- goto err_dma_free;
- }
- }
}
return 0;
return -ENOMEM;
}
-static void bgmac_dma_init(struct bgmac *bgmac)
+static int bgmac_dma_init(struct bgmac *bgmac)
{
struct bgmac_dma_ring *ring;
- int i;
+ int i, err;
for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) {
ring = &bgmac->tx_ring[i];
if (ring->unaligned)
bgmac_dma_rx_enable(bgmac, ring);
- for (j = 0; j < ring->num_slots; j++)
- bgmac_dma_rx_setup_desc(bgmac, ring, j);
-
- bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
- ring->index_base +
- ring->num_slots * sizeof(struct bgmac_dma_desc));
-
ring->start = 0;
ring->end = 0;
+ for (j = 0; j < BGMAC_RX_RING_SLOTS; j++) {
+ err = bgmac_dma_rx_skb_for_slot(bgmac, &ring->slots[j]);
+ if (err)
+ goto error;
+
+ bgmac_dma_rx_setup_desc(bgmac, ring, j);
+ }
+
+ bgmac_dma_rx_update_index(bgmac, ring);
}
+
+ return 0;
+
+error:
+ bgmac_dma_cleanup(bgmac);
+ return err;
}
/**************************************************
bgmac_phy_init(bgmac);
netdev_reset_queue(bgmac->net_dev);
-
- bgmac->int_status = 0;
}
static void bgmac_chip_intrs_on(struct bgmac *bgmac)
}
/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipinit */
-static void bgmac_chip_init(struct bgmac *bgmac, bool full_init)
+static void bgmac_chip_init(struct bgmac *bgmac)
{
- struct bgmac_dma_ring *ring;
- int i;
-
/* 1 interrupt per received frame */
bgmac_write(bgmac, BGMAC_INT_RECV_LAZY, 1 << BGMAC_IRL_FC_SHIFT);
bgmac_write(bgmac, BGMAC_RXMAX_LENGTH, 32 + ETHER_MAX_LEN);
- if (full_init) {
- bgmac_dma_init(bgmac);
- if (1) /* FIXME: is there any case we don't want IRQs? */
- bgmac_chip_intrs_on(bgmac);
- } else {
- for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) {
- ring = &bgmac->rx_ring[i];
- bgmac_dma_rx_enable(bgmac, ring);
- }
- }
+ bgmac_chip_intrs_on(bgmac);
bgmac_enable(bgmac);
}
if (!int_status)
return IRQ_NONE;
- /* Ack */
- bgmac_write(bgmac, BGMAC_INT_STATUS, int_status);
+ int_status &= ~(BGMAC_IS_TX0 | BGMAC_IS_RX);
+ if (int_status)
+ bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", int_status);
/* Disable new interrupts until handling existing ones */
bgmac_chip_intrs_off(bgmac);
- bgmac->int_status = int_status;
-
napi_schedule(&bgmac->napi);
return IRQ_HANDLED;
static int bgmac_poll(struct napi_struct *napi, int weight)
{
struct bgmac *bgmac = container_of(napi, struct bgmac, napi);
- struct bgmac_dma_ring *ring;
int handled = 0;
- if (bgmac->int_status & BGMAC_IS_TX0) {
- ring = &bgmac->tx_ring[0];
- bgmac_dma_tx_free(bgmac, ring);
- bgmac->int_status &= ~BGMAC_IS_TX0;
- }
+ /* Ack */
+ bgmac_write(bgmac, BGMAC_INT_STATUS, ~0);
- if (bgmac->int_status & BGMAC_IS_RX) {
- ring = &bgmac->rx_ring[0];
- handled += bgmac_dma_rx_read(bgmac, ring, weight);
- bgmac->int_status &= ~BGMAC_IS_RX;
- }
+ bgmac_dma_tx_free(bgmac, &bgmac->tx_ring[0]);
+ handled += bgmac_dma_rx_read(bgmac, &bgmac->rx_ring[0], weight);
- if (bgmac->int_status) {
- bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", bgmac->int_status);
- bgmac->int_status = 0;
- }
+ /* Poll again if more events arrived in the meantime */
+ if (bgmac_read(bgmac, BGMAC_INT_STATUS) & (BGMAC_IS_TX0 | BGMAC_IS_RX))
+ return handled;
if (handled < weight) {
napi_complete(napi);
int err = 0;
bgmac_chip_reset(bgmac);
+
+ err = bgmac_dma_init(bgmac);
+ if (err)
+ return err;
+
/* Specs say about reclaiming rings here, but we do that in DMA init */
- bgmac_chip_init(bgmac, true);
+ bgmac_chip_init(bgmac);
err = request_irq(bgmac->core->irq, bgmac_interrupt, IRQF_SHARED,
KBUILD_MODNAME, net_dev);
if (err < 0) {
bgmac_err(bgmac, "IRQ request error: %d!\n", err);
- goto err_out;
+ bgmac_dma_cleanup(bgmac);
+ return err;
}
napi_enable(&bgmac->napi);
phy_start(bgmac->phy_dev);
netif_carrier_on(net_dev);
-
-err_out:
- return err;
+ return 0;
}
static int bgmac_stop(struct net_device *net_dev)
free_irq(bgmac->core->irq, net_dev);
bgmac_chip_reset(bgmac);
+ bgmac_dma_cleanup(bgmac);
return 0;
}
#define BGMAC_MAX_RX_RINGS 1
#define BGMAC_TX_RING_SLOTS 128
-#define BGMAC_RX_RING_SLOTS 512 - 1 /* Why -1? Well, Broadcom does that... */
+#define BGMAC_RX_RING_SLOTS 512
#define BGMAC_RX_HEADER_LEN 28 /* Last 24 bytes are unused. Well... */
#define BGMAC_RX_FRAME_OFFSET 30 /* There are 2 unused bytes between header and real data */
+#define BGMAC_RX_BUF_OFFSET (NET_SKB_PAD + NET_IP_ALIGN - \
+ BGMAC_RX_FRAME_OFFSET)
#define BGMAC_RX_MAX_FRAME_SIZE 1536 /* Copied from b44/tg3 */
#define BGMAC_RX_BUF_SIZE (BGMAC_RX_FRAME_OFFSET + BGMAC_RX_MAX_FRAME_SIZE)
-#define BGMAC_RX_ALLOC_SIZE (SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE) + \
+#define BGMAC_RX_ALLOC_SIZE (SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE + BGMAC_RX_BUF_OFFSET) + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
#define BGMAC_BFL_ENETROBO 0x0010 /* has ephy roboswitch spi */
* empty.
*/
struct bgmac_dma_ring {
- u16 num_slots;
- u16 start;
- u16 end;
+ u32 start;
+ u32 end;
- u16 mmio_base;
struct bgmac_dma_desc *cpu_base;
dma_addr_t dma_base;
u32 index_base; /* Used for unaligned rings only, otherwise 0 */
+ u16 mmio_base;
bool unaligned;
struct bgmac_slot_info slots[BGMAC_RX_RING_SLOTS];
/* Int */
u32 int_mask;
- u32 int_status;
/* Current MAC state */
int mac_speed;
adap->swintr = 1;
t4_write_reg(adap, MYPF_REG(PL_PF_INT_CAUSE_A), v);
}
- t4_slow_intr_handler(adap);
+ if (adap->flags & MASTER_PF)
+ t4_slow_intr_handler(adap);
return IRQ_HANDLED;
}
*/
#define NOMEM_TMR_IDX (SGE_NTIMERS - 1)
-/*
- * An FL with <= FL_STARVE_THRES buffers is starving and a periodic timer will
- * attempt to refill it.
- */
-#define FL_STARVE_THRES 4
-
/*
* Suspend an Ethernet Tx queue with fewer available descriptors than this.
* This is the same as calc_tx_descs() for a TSO packet with
* Max Tx descriptor space we allow for an Ethernet packet to be inlined
* into a WR.
*/
-#define MAX_IMM_TX_PKT_LEN 128
+#define MAX_IMM_TX_PKT_LEN 256
/*
* Max size of a WR sent through a control Tx queue.
return fl->size - 8; /* 1 descriptor = 8 buffers */
}
-static inline bool fl_starving(const struct sge_fl *fl)
+/**
+ * fl_starving - return whether a Free List is starving.
+ * @adapter: pointer to the adapter
+ * @fl: the Free List
+ *
+ * Tests specified Free List to see whether the number of buffers
+ * available to the hardware has falled below our "starvation"
+ * threshold.
+ */
+static inline bool fl_starving(const struct adapter *adapter,
+ const struct sge_fl *fl)
{
- return fl->avail - fl->pend_cred <= FL_STARVE_THRES;
+ const struct sge *s = &adapter->sge;
+
+ return fl->avail - fl->pend_cred <= s->fl_starve_thres;
}
static int map_skb(struct device *dev, const struct sk_buff *skb,
unsigned int cred = q->avail;
__be64 *d = &q->desc[q->pidx];
struct rx_sw_desc *sd = &q->sdesc[q->pidx];
+ int node;
gfp |= __GFP_NOWARN;
+ node = dev_to_node(adap->pdev_dev);
if (s->fl_pg_order == 0)
goto alloc_small_pages;
* Prefer large buffers
*/
while (n) {
- pg = __dev_alloc_pages(gfp, s->fl_pg_order);
+ pg = alloc_pages_node(node, gfp | __GFP_COMP, s->fl_pg_order);
if (unlikely(!pg)) {
q->large_alloc_failed++;
break; /* fall back to single pages */
alloc_small_pages:
while (n--) {
- pg = __dev_alloc_page(gfp);
+ pg = alloc_pages_node(node, gfp, 0);
if (unlikely(!pg)) {
q->alloc_failed++;
break;
q->pend_cred += cred;
ring_fl_db(adap, q);
- if (unlikely(fl_starving(q))) {
+ if (unlikely(fl_starving(adap, q))) {
smp_wmb();
set_bit(q->cntxt_id - adap->sge.egr_start,
adap->sge.starving_fl);
*/
static inline unsigned int sgl_len(unsigned int n)
{
+ /* A Direct Scatter Gather List uses 32-bit lengths and 64-bit PCI DMA
+ * addresses. The DSGL Work Request starts off with a 32-bit DSGL
+ * ULPTX header, then Length0, then Address0, then, for 1 <= i <= N,
+ * repeated sequences of { Length[i], Length[i+1], Address[i],
+ * Address[i+1] } (this ensures that all addresses are on 64-bit
+ * boundaries). If N is even, then Length[N+1] should be set to 0 and
+ * Address[N+1] is omitted.
+ *
+ * The following calculation incorporates all of the above. It's
+ * somewhat hard to follow but, briefly: the "+2" accounts for the
+ * first two flits which include the DSGL header, Length0 and
+ * Address0; the "(3*(n-1))/2" covers the main body of list entries (3
+ * flits for every pair of the remaining N) +1 if (n-1) is odd; and
+ * finally the "+((n-1)&1)" adds the one remaining flit needed if
+ * (n-1) is odd ...
+ */
n--;
return (3 * n) / 2 + (n & 1) + 2;
}
unsigned int flits;
int hdrlen = is_eth_imm(skb);
+ /* If the skb is small enough, we can pump it out as a work request
+ * with only immediate data. In that case we just have to have the
+ * TX Packet header plus the skb data in the Work Request.
+ */
+
if (hdrlen)
return DIV_ROUND_UP(skb->len + hdrlen, sizeof(__be64));
+ /* Otherwise, we're going to have to construct a Scatter gather list
+ * of the skb body and fragments. We also include the flits necessary
+ * for the TX Packet Work Request and CPL. We always have a firmware
+ * Write Header (incorporated as part of the cpl_tx_pkt_lso and
+ * cpl_tx_pkt structures), followed by either a TX Packet Write CPL
+ * message or, if we're doing a Large Send Offload, an LSO CPL message
+ * with an embedded TX Packet Write CPL message.
+ */
flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 4;
if (skb_shinfo(skb)->gso_size)
- flits += 2;
+ flits += (sizeof(struct fw_eth_tx_pkt_wr) +
+ sizeof(struct cpl_tx_pkt_lso_core) +
+ sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
+ else
+ flits += (sizeof(struct fw_eth_tx_pkt_wr) +
+ sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
return flits;
}
{
struct adapter *adap = cookie;
- t4_slow_intr_handler(adap);
+ if (adap->flags & MASTER_PF)
+ t4_slow_intr_handler(adap);
process_intrq(adap);
return IRQ_HANDLED;
}
struct adapter *adap = cookie;
t4_write_reg(adap, MYPF_REG(PCIE_PF_CLI_A), 0);
- if (t4_slow_intr_handler(adap) | process_intrq(adap))
+ if (((adap->flags & MASTER_PF) && t4_slow_intr_handler(adap)) |
+ process_intrq(adap))
return IRQ_HANDLED;
return IRQ_NONE; /* probably shared interrupt */
}
clear_bit(id, s->starving_fl);
smp_mb__after_atomic();
- if (fl_starving(fl)) {
+ if (fl_starving(adap, fl)) {
rxq = container_of(fl, struct sge_eth_rxq, fl);
if (napi_reschedule(&rxq->rspq.napi))
fl->starving++;
#define BE_NAPI_WEIGHT 64
#define MAX_RX_POST BE_NAPI_WEIGHT /* Frags posted at a time */
#define RX_FRAGS_REFILL_WM (RX_Q_LEN - MAX_RX_POST)
+#define MAX_NUM_POST_ERX_DB 255u
#define MAX_VFS 30 /* Max VFs supported by BE3 FW */
#define FW_VER_LEN 32
if (rxo->rx_post_starved)
rxo->rx_post_starved = false;
do {
- notify = min(256u, posted);
+ notify = min(MAX_NUM_POST_ERX_DB, posted);
be_rxq_notify(adapter, rxq->id, notify);
posted -= notify;
} while (posted);
struct timecounter tc;
struct ptp_clock *ptp_clock;
struct ptp_clock_info ptp_clock_info;
+ struct pm_qos_request pm_qos_req;
u16 eee_advert;
};
ew32(RXDCTL(0), rxdctl | 0x3);
}
- pm_qos_update_request(&adapter->netdev->pm_qos_req, lat);
+ pm_qos_update_request(&adapter->pm_qos_req, lat);
} else {
- pm_qos_update_request(&adapter->netdev->pm_qos_req,
+ pm_qos_update_request(&adapter->pm_qos_req,
PM_QOS_DEFAULT_VALUE);
}
e1000_update_mng_vlan(adapter);
/* DMA latency requirement to workaround jumbo issue */
- pm_qos_add_request(&adapter->netdev->pm_qos_req, PM_QOS_CPU_DMA_LATENCY,
+ pm_qos_add_request(&adapter->pm_qos_req, PM_QOS_CPU_DMA_LATENCY,
PM_QOS_DEFAULT_VALUE);
/* before we allocate an interrupt, we must be ready to handle it.
!test_bit(__E1000_TESTING, &adapter->state))
e1000e_release_hw_control(adapter);
- pm_qos_remove_request(&adapter->netdev->pm_qos_req);
+ pm_qos_remove_request(&adapter->pm_qos_req);
pm_runtime_put_sync(&pdev->dev);
config NET_VENDOR_TOSHIBA
bool "Toshiba devices"
default y
- depends on PCI && (PPC_IBM_CELL_BLADE || PPC_CELLEB || MIPS) || PPC_PS3
+ depends on PCI && (PPC_IBM_CELL_BLADE || MIPS) || PPC_PS3
---help---
If you have a network (Ethernet) card belonging to this class, say Y
and read the Ethernet-HOWTO, available from
config SPIDER_NET
tristate "Spider Gigabit Ethernet driver"
- depends on PCI && (PPC_IBM_CELL_BLADE || PPC_CELLEB)
+ depends on PCI && PPC_IBM_CELL_BLADE
select FW_LOADER
select SUNGEM_PHY
---help---
bool is_data_pkt;
bool xmit_more; /* from skb */
+ bool cp_partial; /* partial copy into send buffer */
+
u16 vlan_tci;
u16 q_idx;
/* This points to the memory after page_buf */
struct rndis_message *rndis_msg;
+ u32 rmsg_size; /* RNDIS header and PPI size */
+ u32 rmsg_pgcnt; /* page count of RNDIS header and PPI */
+
u32 total_data_buflen;
/* Points to the send/receive buffer where the ethernet frame is */
void *data;
u32 msg_size = 0;
u32 padding = 0;
u32 remain = packet->total_data_buflen % net_device->pkt_align;
+ u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
+ packet->page_buf_cnt;
/* Add padding */
- if (packet->is_data_pkt && packet->xmit_more && remain) {
+ if (packet->is_data_pkt && packet->xmit_more && remain &&
+ !packet->cp_partial) {
padding = net_device->pkt_align - remain;
packet->rndis_msg->msg_len += padding;
packet->total_data_buflen += padding;
}
- for (i = 0; i < packet->page_buf_cnt; i++) {
+ for (i = 0; i < page_count; i++) {
char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
u32 offset = packet->page_buf[i].offset;
u32 len = packet->page_buf[i].len;
struct net_device *ndev = net_device->ndev;
u64 req_id;
int ret;
+ struct hv_page_buffer *pgbuf;
nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
if (packet->is_data_pkt) {
return -ENODEV;
if (packet->page_buf_cnt) {
+ pgbuf = packet->cp_partial ? packet->page_buf +
+ packet->rmsg_pgcnt : packet->page_buf;
ret = vmbus_sendpacket_pagebuffer(out_channel,
- packet->page_buf,
+ pgbuf,
packet->page_buf_cnt,
&nvmsg,
sizeof(struct nvsp_message),
unsigned long flag;
struct multi_send_data *msdp;
struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
+ bool try_batch;
net_device = get_outbound_net_device(device);
if (!net_device)
}
packet->channel = out_channel;
packet->send_buf_index = NETVSC_INVALID_INDEX;
+ packet->cp_partial = false;
msdp = &net_device->msd[q_idx];
if (msdp->pkt)
msd_len = msdp->pkt->total_data_buflen;
- if (packet->is_data_pkt && msd_len > 0 &&
- msdp->count < net_device->max_pkt &&
- msd_len + pktlen + net_device->pkt_align <
+ try_batch = packet->is_data_pkt && msd_len > 0 && msdp->count <
+ net_device->max_pkt;
+
+ if (try_batch && msd_len + pktlen + net_device->pkt_align <
net_device->send_section_size) {
section_index = msdp->pkt->send_buf_index;
+ } else if (try_batch && msd_len + packet->rmsg_size <
+ net_device->send_section_size) {
+ section_index = msdp->pkt->send_buf_index;
+ packet->cp_partial = true;
+
} else if (packet->is_data_pkt && pktlen + net_device->pkt_align <
net_device->send_section_size) {
section_index = netvsc_get_next_send_section(net_device);
netvsc_copy_to_send_buf(net_device,
section_index, msd_len,
packet);
- if (!packet->part_of_skb) {
- skb = (struct sk_buff *)
- (unsigned long)
- packet->send_completion_tid;
-
- packet->send_completion_tid = 0;
- }
- packet->page_buf_cnt = 0;
packet->send_buf_index = section_index;
- packet->total_data_buflen += msd_len;
+
+ if (packet->cp_partial) {
+ packet->page_buf_cnt -= packet->rmsg_pgcnt;
+ packet->total_data_buflen = msd_len + packet->rmsg_size;
+ } else {
+ packet->page_buf_cnt = 0;
+ packet->total_data_buflen += msd_len;
+ if (!packet->part_of_skb) {
+ skb = (struct sk_buff *)(unsigned long)packet->
+ send_completion_tid;
+ packet->send_completion_tid = 0;
+ }
+ }
if (msdp->pkt)
netvsc_xmit_completion(msdp->pkt);
- if (packet->xmit_more) {
+ if (packet->xmit_more && !packet->cp_partial) {
msdp->pkt = packet;
msdp->count++;
} else {
}
static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
- struct hv_page_buffer *pb)
+ struct hv_netvsc_packet *packet)
{
+ struct hv_page_buffer *pb = packet->page_buf;
u32 slots_used = 0;
char *data = skb->data;
int frags = skb_shinfo(skb)->nr_frags;
int i;
/* The packet is laid out thus:
- * 1. hdr
+ * 1. hdr: RNDIS header and PPI
* 2. skb linear data
* 3. skb fragment data
*/
offset_in_page(hdr),
len, &pb[slots_used]);
+ packet->rmsg_size = len;
+ packet->rmsg_pgcnt = slots_used;
+
slots_used += fill_pg_buf(virt_to_page(data),
offset_in_page(data),
skb_headlen(skb), &pb[slots_used]);
rndis_msg->msg_len += rndis_msg_size;
packet->total_data_buflen = rndis_msg->msg_len;
packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
- skb, &page_buf[0]);
+ skb, packet);
ret = netvsc_send(net_device_ctx->device_ctx, packet);
config BROADCOM_PHY
tristate "Drivers for Broadcom PHYs"
---help---
- Currently supports the BCM5411, BCM5421, BCM5461, BCM5464, BCM5481
- and BCM5482 PHYs.
+ Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
+ BCM5481 and BCM5482 PHYs.
config BCM63XX_PHY
tristate "Drivers for Broadcom 63xx SOCs internal PHY"
.ack_interrupt = bcm54xx_ack_interrupt,
.config_intr = bcm54xx_config_intr,
.driver = { .owner = THIS_MODULE },
+}, {
+ .phy_id = PHY_ID_BCM54616S,
+ .phy_id_mask = 0xfffffff0,
+ .name = "Broadcom BCM54616S",
+ .features = PHY_GBIT_FEATURES |
+ SUPPORTED_Pause | SUPPORTED_Asym_Pause,
+ .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+ .config_init = bcm54xx_config_init,
+ .config_aneg = genphy_config_aneg,
+ .read_status = genphy_read_status,
+ .ack_interrupt = bcm54xx_ack_interrupt,
+ .config_intr = bcm54xx_config_intr,
+ .driver = { .owner = THIS_MODULE },
}, {
.phy_id = PHY_ID_BCM5464,
.phy_id_mask = 0xfffffff0,
{ PHY_ID_BCM5411, 0xfffffff0 },
{ PHY_ID_BCM5421, 0xfffffff0 },
{ PHY_ID_BCM5461, 0xfffffff0 },
+ { PHY_ID_BCM54616S, 0xfffffff0 },
{ PHY_ID_BCM5464, 0xfffffff0 },
{ PHY_ID_BCM5482, 0xfffffff0 },
{ PHY_ID_BCM5482, 0xfffffff0 },
* especially now that control transfers can be queued.
*/
static void
-kevent (struct work_struct *work)
+usbnet_deferred_kevent (struct work_struct *work)
{
struct usbnet *dev =
container_of(work, struct usbnet, kevent);
skb_queue_head_init(&dev->rxq_pause);
dev->bh.func = usbnet_bh;
dev->bh.data = (unsigned long) dev;
- INIT_WORK (&dev->kevent, kevent);
+ INIT_WORK (&dev->kevent, usbnet_deferred_kevent);
init_usb_anchor(&dev->deferred);
dev->delay.function = usbnet_bh;
dev->delay.data = (unsigned long) dev;
}
}
- skb = iptunnel_handle_offloads(skb, udp_sum, type);
- if (IS_ERR(skb)) {
- err = -EINVAL;
- goto err;
- }
-
skb_scrub_packet(skb, xnet);
min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
goto err;
}
+ skb = iptunnel_handle_offloads(skb, udp_sum, type);
+ if (IS_ERR(skb)) {
+ err = -EINVAL;
+ goto err;
+ }
+
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = htonl(VXLAN_HF_VNI);
vxh->vx_vni = md->vni;
}
}
- skb = iptunnel_handle_offloads(skb, udp_sum, type);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ VXLAN_HLEN + sizeof(struct iphdr)
+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
if (WARN_ON(!skb))
return -ENOMEM;
+ skb = iptunnel_handle_offloads(skb, udp_sum, type);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = htonl(VXLAN_HF_VNI);
vxh->vx_vni = md->vni;
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
-#include <linux/aio.h>
#include <linux/errno.h>
#include <linux/mtio.h>
#include <linux/ioctl.h>
#include <linux/mutex.h>
#include <linux/atomic.h>
#include <linux/ratelimit.h>
+#include <linux/uio.h>
#include "scsi.h"
#include <scsi/scsi_dbg.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/fcntl.h>
-#include <linux/aio.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/seq_file.h>
#include <linux/export.h>
#include <linux/hid.h>
#include <linux/module.h>
+#include <linux/uio.h>
#include <asm/unaligned.h>
#include <linux/usb/composite.h>
unuse_mm(io_data->mm);
}
- aio_complete(io_data->kiocb, ret, ret);
+ io_data->kiocb->ki_complete(io_data->kiocb, ret, ret);
- if (io_data->ffs->ffs_eventfd && !io_data->kiocb->ki_eventfd)
+ if (io_data->ffs->ffs_eventfd &&
+ !(io_data->kiocb->ki_flags & IOCB_EVENTFD))
eventfd_signal(io_data->ffs->ffs_eventfd, 1);
usb_ep_free_request(io_data->ep, io_data->req);
#include <linux/poll.h>
#include <linux/mmu_context.h>
#include <linux/aio.h>
+#include <linux/uio.h>
#include <linux/device.h>
#include <linux/moduleparam.h>
ret = -EFAULT;
/* completing the iocb can drop the ctx and mm, don't touch mm after */
- aio_complete(iocb, ret, ret);
+ iocb->ki_complete(iocb, ret, ret);
kfree(priv->buf);
kfree(priv->to_free);
kfree(priv);
iocb->private = NULL;
/* aio_complete() reports bytes-transferred _and_ faults */
- aio_complete(iocb, req->actual ? req->actual : req->status,
+
+ iocb->ki_complete(iocb, req->actual ? req->actual : req->status,
req->status);
} else {
/* ep_copy_to_user() won't report both; we hide some faults */
iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
iov_iter_advance(&msg.msg_iter, hdr_size);
/* Sanity check */
- if (!iov_iter_count(&msg.msg_iter)) {
+ if (!msg_data_left(&msg)) {
vq_err(vq, "Unexpected header len for TX: "
"%zd expected %zd\n",
len, hdr_size);
break;
}
- len = iov_iter_count(&msg.msg_iter);
+ len = msg_data_left(&msg);
zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
&& (nvq->upend_idx + 1) % UIO_MAXIOV !=
#include <linux/pagemap.h>
#include <linux/idr.h>
#include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
* affs regular file handling primitives
*/
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "affs.h"
static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
int afs_abort_to_error(u32 abort_code)
{
switch (abort_code) {
+ /* low errno codes inserted into abort namespace */
case 13: return -EACCES;
case 27: return -EFBIG;
case 30: return -EROFS;
+
+ /* VICE "special error" codes; 101 - 111 */
case VSALVAGE: return -EIO;
case VNOVNODE: return -ENOENT;
case VNOVOL: return -ENOMEDIUM;
case VOVERQUOTA: return -EDQUOT;
case VBUSY: return -EBUSY;
case VMOVED: return -ENXIO;
- case 0x2f6df0a: return -EWOULDBLOCK;
+
+ /* Unified AFS error table; ET "uae" == 0x2f6df00 */
+ case 0x2f6df00: return -EPERM;
+ case 0x2f6df01: return -ENOENT;
+ case 0x2f6df04: return -EIO;
+ case 0x2f6df0a: return -EAGAIN;
+ case 0x2f6df0b: return -ENOMEM;
case 0x2f6df0c: return -EACCES;
case 0x2f6df0f: return -EBUSY;
case 0x2f6df10: return -EEXIST;
case 0x2f6df11: return -EXDEV;
+ case 0x2f6df12: return -ENODEV;
case 0x2f6df13: return -ENOTDIR;
case 0x2f6df14: return -EISDIR;
case 0x2f6df15: return -EINVAL;
case 0x2f6df23: return -ENAMETOOLONG;
case 0x2f6df24: return -ENOLCK;
case 0x2f6df26: return -ENOTEMPTY;
+ case 0x2f6df28: return -EWOULDBLOCK;
+ case 0x2f6df69: return -ENOTCONN;
+ case 0x2f6df6c: return -ETIMEDOUT;
case 0x2f6df78: return -EDQUOT;
+ /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */
case RXKADINCONSISTENCY: return -EPROTO;
case RXKADPACKETSHORT: return -EPROTO;
case RXKADLEVELFAIL: return -EKEYREJECTED;
void afs_send_empty_reply(struct afs_call *call)
{
struct msghdr msg;
- struct kvec iov[1];
_enter("");
- iov[0].iov_base = NULL;
- iov[0].iov_len = 0;
msg.msg_name = NULL;
msg.msg_namelen = 0;
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 0, 0); /* WTF? */
+ iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
-#include <linux/aio.h>
#include "internal.h"
static int afs_write_back_from_locked_page(struct afs_writeback *wb,
unsigned id;
};
+/*
+ * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
+ * cancelled or completed (this makes a certain amount of sense because
+ * successful cancellation - io_cancel() - does deliver the completion to
+ * userspace).
+ *
+ * And since most things don't implement kiocb cancellation and we'd really like
+ * kiocb completion to be lockless when possible, we use ki_cancel to
+ * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
+ * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
+ */
+#define KIOCB_CANCELLED ((void *) (~0ULL))
+
+struct aio_kiocb {
+ struct kiocb common;
+
+ struct kioctx *ki_ctx;
+ kiocb_cancel_fn *ki_cancel;
+
+ struct iocb __user *ki_user_iocb; /* user's aiocb */
+ __u64 ki_user_data; /* user's data for completion */
+
+ struct list_head ki_list; /* the aio core uses this
+ * for cancellation */
+
+ /*
+ * If the aio_resfd field of the userspace iocb is not zero,
+ * this is the underlying eventfd context to deliver events to.
+ */
+ struct eventfd_ctx *ki_eventfd;
+};
+
/*------ sysctl variables----*/
static DEFINE_SPINLOCK(aio_nr_lock);
unsigned long aio_nr; /* current system wide number of aio requests */
if (IS_ERR(aio_mnt))
panic("Failed to create aio fs mount.");
- kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+ kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
-void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
+void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
{
+ struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
struct kioctx *ctx = req->ki_ctx;
unsigned long flags;
}
EXPORT_SYMBOL(kiocb_set_cancel_fn);
-static int kiocb_cancel(struct kiocb *kiocb)
+static int kiocb_cancel(struct aio_kiocb *kiocb)
{
kiocb_cancel_fn *old, *cancel;
cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
} while (cancel != old);
- return cancel(kiocb);
+ return cancel(&kiocb->common);
}
static void free_ioctx(struct work_struct *work)
static void free_ioctx_users(struct percpu_ref *ref)
{
struct kioctx *ctx = container_of(ref, struct kioctx, users);
- struct kiocb *req;
+ struct aio_kiocb *req;
spin_lock_irq(&ctx->ctx_lock);
while (!list_empty(&ctx->active_reqs)) {
req = list_first_entry(&ctx->active_reqs,
- struct kiocb, ki_list);
+ struct aio_kiocb, ki_list);
list_del_init(&req->ki_list);
kiocb_cancel(req);
return 0;
}
-/* wait_on_sync_kiocb:
- * Waits on the given sync kiocb to complete.
- */
-ssize_t wait_on_sync_kiocb(struct kiocb *req)
-{
- while (!req->ki_ctx) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (req->ki_ctx)
- break;
- io_schedule();
- }
- __set_current_state(TASK_RUNNING);
- return req->ki_user_data;
-}
-EXPORT_SYMBOL(wait_on_sync_kiocb);
-
/*
* exit_aio: called when the last user of mm goes away. At this point, there is
* no way for any new requests to be submited or any of the io_* syscalls to be
* Allocate a slot for an aio request.
* Returns NULL if no requests are free.
*/
-static inline struct kiocb *aio_get_req(struct kioctx *ctx)
+static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
{
- struct kiocb *req;
+ struct aio_kiocb *req;
if (!get_reqs_available(ctx)) {
user_refill_reqs_available(ctx);
return NULL;
}
-static void kiocb_free(struct kiocb *req)
+static void kiocb_free(struct aio_kiocb *req)
{
- if (req->ki_filp)
- fput(req->ki_filp);
+ if (req->common.ki_filp)
+ fput(req->common.ki_filp);
if (req->ki_eventfd != NULL)
eventfd_ctx_put(req->ki_eventfd);
kmem_cache_free(kiocb_cachep, req);
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
-void aio_complete(struct kiocb *iocb, long res, long res2)
+static void aio_complete(struct kiocb *kiocb, long res, long res2)
{
+ struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring *ring;
struct io_event *ev_page, *event;
* ref, no other paths have a way to get another ref
* - the sync task helpfully left a reference to itself in the iocb
*/
- if (is_sync_kiocb(iocb)) {
- iocb->ki_user_data = res;
- smp_wmb();
- iocb->ki_ctx = ERR_PTR(-EXDEV);
- wake_up_process(iocb->ki_obj.tsk);
- return;
- }
+ BUG_ON(is_sync_kiocb(kiocb));
if (iocb->ki_list.next) {
unsigned long flags;
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
- event->obj = (u64)(unsigned long)iocb->ki_obj.user;
+ event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
event->data = iocb->ki_user_data;
event->res = res;
event->res2 = res2;
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
- ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
+ ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
res, res2);
/* after flagging the request as done, we
percpu_ref_put(&ctx->reqs);
}
-EXPORT_SYMBOL(aio_complete);
/* aio_read_events_ring
* Pull an event off of the ioctx's event ring. Returns the number of
static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
int rw, char __user *buf,
unsigned long *nr_segs,
+ size_t *len,
struct iovec **iovec,
bool compat)
{
ssize_t ret;
- *nr_segs = kiocb->ki_nbytes;
+ *nr_segs = *len;
#ifdef CONFIG_COMPAT
if (compat)
if (ret < 0)
return ret;
- /* ki_nbytes now reflect bytes instead of segs */
- kiocb->ki_nbytes = ret;
+ /* len now reflect bytes instead of segs */
+ *len = ret;
return 0;
}
static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
int rw, char __user *buf,
unsigned long *nr_segs,
+ size_t len,
struct iovec *iovec)
{
- if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
+ if (unlikely(!access_ok(!rw, buf, len)))
return -EFAULT;
iovec->iov_base = buf;
- iovec->iov_len = kiocb->ki_nbytes;
+ iovec->iov_len = len;
*nr_segs = 1;
return 0;
}
* Performs the initial checks and io submission.
*/
static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
- char __user *buf, bool compat)
+ char __user *buf, size_t len, bool compat)
{
struct file *file = req->ki_filp;
ssize_t ret;
if (!rw_op && !iter_op)
return -EINVAL;
- ret = (opcode == IOCB_CMD_PREADV ||
- opcode == IOCB_CMD_PWRITEV)
- ? aio_setup_vectored_rw(req, rw, buf, &nr_segs,
- &iovec, compat)
- : aio_setup_single_vector(req, rw, buf, &nr_segs,
- iovec);
+ if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
+ ret = aio_setup_vectored_rw(req, rw, buf, &nr_segs,
+ &len, &iovec, compat);
+ else
+ ret = aio_setup_single_vector(req, rw, buf, &nr_segs,
+ len, iovec);
if (!ret)
- ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
+ ret = rw_verify_area(rw, file, &req->ki_pos, len);
if (ret < 0) {
if (iovec != inline_vecs)
kfree(iovec);
return ret;
}
- req->ki_nbytes = ret;
+ len = ret;
/* XXX: move/kill - rw_verify_area()? */
/* This matches the pread()/pwrite() logic */
file_start_write(file);
if (iter_op) {
- iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
+ iov_iter_init(&iter, rw, iovec, nr_segs, len);
ret = iter_op(req, &iter);
} else {
ret = rw_op(req, iovec, nr_segs, req->ki_pos);
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
struct iocb *iocb, bool compat)
{
- struct kiocb *req;
+ struct aio_kiocb *req;
ssize_t ret;
/* enforce forwards compatibility on users */
if (unlikely(!req))
return -EAGAIN;
- req->ki_filp = fget(iocb->aio_fildes);
- if (unlikely(!req->ki_filp)) {
+ req->common.ki_filp = fget(iocb->aio_fildes);
+ if (unlikely(!req->common.ki_filp)) {
ret = -EBADF;
goto out_put_req;
}
+ req->common.ki_pos = iocb->aio_offset;
+ req->common.ki_complete = aio_complete;
+ req->common.ki_flags = 0;
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
req->ki_eventfd = NULL;
goto out_put_req;
}
+
+ req->common.ki_flags |= IOCB_EVENTFD;
}
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
goto out_put_req;
}
- req->ki_obj.user = user_iocb;
+ req->ki_user_iocb = user_iocb;
req->ki_user_data = iocb->aio_data;
- req->ki_pos = iocb->aio_offset;
- req->ki_nbytes = iocb->aio_nbytes;
- ret = aio_run_iocb(req, iocb->aio_lio_opcode,
+ ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
(char __user *)(unsigned long)iocb->aio_buf,
+ iocb->aio_nbytes,
compat);
if (ret)
goto out_put_req;
/* lookup_kiocb
* Finds a given iocb for cancellation.
*/
-static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
- u32 key)
+static struct aio_kiocb *
+lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
{
- struct list_head *pos;
+ struct aio_kiocb *kiocb;
assert_spin_locked(&ctx->ctx_lock);
return NULL;
/* TODO: use a hash or array, this sucks. */
- list_for_each(pos, &ctx->active_reqs) {
- struct kiocb *kiocb = list_kiocb(pos);
- if (kiocb->ki_obj.user == iocb)
+ list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
+ if (kiocb->ki_user_iocb == iocb)
return kiocb;
}
return NULL;
struct io_event __user *, result)
{
struct kioctx *ctx;
- struct kiocb *kiocb;
+ struct aio_kiocb *kiocb;
u32 key;
int ret;
#include <linux/buffer_head.h>
#include <linux/vfs.h>
#include <linux/writeback.h>
+#include <linux/uio.h>
#include <asm/uaccess.h>
#include "bfs.h"
#include <linux/namei.h>
#include <linux/log2.h>
#include <linux/cleancache.h>
-#include <linux/aio.h>
#include <asm/uaccess.h>
#include "internal.h"
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
-#include <linux/aio.h>
#include <linux/falloc.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/compat.h>
#include <linux/slab.h>
#include <linux/btrfs.h>
+#include <linux/uio.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
-#include <linux/aio.h>
#include <linux/bit_spinlock.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/btrfs.h>
#include <linux/blkdev.h>
#include <linux/posix_acl_xattr.h>
+#include <linux/uio.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/writeback.h>
-#include <linux/aio.h>
#include <linux/falloc.h>
#include "super.h"
{
struct file *filp = iocb->ki_filp;
struct ceph_file_info *fi = filp->private_data;
- size_t len = iocb->ki_nbytes;
+ size_t len = iov_iter_count(to);
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct page *pinned_page = NULL;
#include <linux/uio.h>
#include <linux/atomic.h>
#include <linux/prefetch.h>
-#include <linux/aio.h>
/*
* How many user pages to map in one call to get_user_pages(). This determines
ret = err;
}
- aio_complete(dio->iocb, ret, 0);
+ dio->iocb->ki_complete(dio->iocb, ret, 0);
}
kmem_cache_free(dio_cache, dio);
* operation. AIO can if it was a broken operation described above or
* in fact if all the bios race to complete before we get here. In
* that case dio_complete() translates the EIOCBQUEUED into the proper
- * return code that the caller will hand to aio_complete().
+ * return code that the caller will hand to ->complete().
*
* This is managed by the bio_lock instead of being an atomic_t so that
* completion paths can drop their ref and use the remaining count to
#include <linux/security.h>
#include <linux/compat.h>
#include <linux/fs_stack.h>
-#include <linux/aio.h>
#include "ecryptfs_kernel.h"
/**
struct file *file = iocb->ki_filp;
rc = generic_file_read_iter(iocb, to);
- /*
- * Even though this is a async interface, we need to wait
- * for IO to finish to update atime
- */
- if (-EIOCBQUEUED == rc)
- rc = wait_on_sync_kiocb(iocb);
if (rc >= 0) {
path = ecryptfs_dentry_to_lower_path(file->f_path.dentry);
touch_atime(path);
#include <linux/mpage.h>
#include <linux/fiemap.h>
#include <linux/namei.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "ext2.h"
#include "acl.h"
#include "xattr.h"
#include <linux/writeback.h>
#include <linux/mpage.h>
#include <linux/namei.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "ext3.h"
#include "xattr.h"
#include "acl.h"
#include <linux/jbd2.h>
#include <linux/mount.h>
#include <linux/path.h>
-#include <linux/aio.h>
#include <linux/quotaops.h>
#include <linux/pagevec.h>
+#include <linux/uio.h>
#include "ext4.h"
#include "ext4_jbd2.h"
#include "xattr.h"
* (sct@redhat.com), 1993, 1998
*/
-#include <linux/aio.h>
#include "ext4_jbd2.h"
#include "truncate.h"
+#include <linux/uio.h>
#include <trace/events/ext4.h>
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
-#include <linux/aio.h>
#include <linux/bitops.h>
#include "ext4_jbd2.h"
#include <linux/pagevec.h>
#include <linux/mpage.h>
#include <linux/namei.h>
-#include <linux/aio.h>
#include <linux/uio.h>
#include <linux/bio.h>
#include <linux/workqueue.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
-#include <linux/aio.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/prefetch.h>
+#include <linux/uio.h>
#include "f2fs.h"
#include "node.h"
#include <linux/mpage.h>
#include <linux/buffer_head.h>
#include <linux/mount.h>
-#include <linux/aio.h>
#include <linux/vfs.h>
#include <linux/parser.h>
#include <linux/uio.h>
#include <linux/device.h>
#include <linux/file.h>
#include <linux/fs.h>
-#include <linux/aio.h>
#include <linux/kdev_t.h>
#include <linux/kthread.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/module.h>
+#include <linux/uio.h>
#include "fuse_i.h"
#include <linux/pipe_fs_i.h>
#include <linux/swap.h>
#include <linux/splice.h>
-#include <linux/aio.h>
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
MODULE_ALIAS("devname:fuse");
#include <linux/module.h>
#include <linux/compat.h>
#include <linux/swap.h>
-#include <linux/aio.h>
#include <linux/falloc.h>
+#include <linux/uio.h>
static const struct file_operations fuse_direct_io_file_operations;
}
}
+static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
+{
+ if (io->err)
+ return io->err;
+
+ if (io->bytes >= 0 && io->write)
+ return -EIO;
+
+ return io->bytes < 0 ? io->size : io->bytes;
+}
+
/**
* In case of short read, the caller sets 'pos' to the position of
* actual end of fuse request in IO request. Otherwise, if bytes_requested
*/
static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
{
+ bool is_sync = is_sync_kiocb(io->iocb);
int left;
spin_lock(&io->lock);
io->bytes = pos;
left = --io->reqs;
+ if (!left && is_sync)
+ complete(io->done);
spin_unlock(&io->lock);
- if (!left) {
- long res;
+ if (!left && !is_sync) {
+ ssize_t res = fuse_get_res_by_io(io);
- if (io->err)
- res = io->err;
- else if (io->bytes >= 0 && io->write)
- res = -EIO;
- else {
- res = io->bytes < 0 ? io->size : io->bytes;
+ if (res >= 0) {
+ struct inode *inode = file_inode(io->iocb->ki_filp);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
- if (!is_sync_kiocb(io->iocb)) {
- struct inode *inode = file_inode(io->iocb->ki_filp);
- struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_inode *fi = get_fuse_inode(inode);
-
- spin_lock(&fc->lock);
- fi->attr_version = ++fc->attr_version;
- spin_unlock(&fc->lock);
- }
+ spin_lock(&fc->lock);
+ fi->attr_version = ++fc->attr_version;
+ spin_unlock(&fc->lock);
}
- aio_complete(io->iocb, res, 0);
+ io->iocb->ki_complete(io->iocb, res, 0);
kfree(io);
}
}
fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
loff_t offset)
{
+ DECLARE_COMPLETION_ONSTACK(wait);
ssize_t ret = 0;
struct file *file = iocb->ki_filp;
struct fuse_file *ff = file->private_data;
if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE)
io->async = false;
+ if (io->async && is_sync_kiocb(iocb))
+ io->done = &wait;
+
if (rw == WRITE)
ret = __fuse_direct_write(io, iter, &pos);
else
if (!is_sync_kiocb(iocb))
return -EIOCBQUEUED;
- ret = wait_on_sync_kiocb(iocb);
- } else {
- kfree(io);
+ wait_for_completion(&wait);
+ ret = fuse_get_res_by_io(io);
}
+ kfree(io);
+
if (rw == WRITE) {
if (ret > 0)
fuse_write_update_size(inode, pos);
int err;
struct kiocb *iocb;
struct file *file;
+ struct completion *done;
};
/**
#include <linux/swap.h>
#include <linux/gfs2_ondisk.h>
#include <linux/backing-dev.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include <trace/events/writeback.h>
#include "gfs2.h"
#include <asm/uaccess.h>
#include <linux/dlm.h>
#include <linux/dlm_plock.h>
-#include <linux/aio.h>
#include <linux/delay.h>
#include "gfs2.h"
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "hfs_fs.h"
#include "btree.h"
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
#include <linux/buffer_head.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
+#include <linux/uio.h>
#include <linux/writeback.h>
-#include <linux/aio.h>
#include "jfs_incore.h"
#include "jfs_inode.h"
#include "jfs_filsys.h"
return -EINVAL;
#else
- VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
+ VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
if (rw == READ)
return nfs_file_direct_read(iocb, iter, pos);
long res = (long) dreq->error;
if (!res)
res = (long) dreq->count;
- aio_complete(dreq->iocb, res, 0);
+ dreq->iocb->ki_complete(dreq->iocb, res, 0);
}
complete_all(&dreq->completion);
#include <linux/nfs_mount.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
-#include <linux/aio.h>
#include <linux/gfp.h>
#include <linux/swap.h>
#include <linux/mpage.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "nilfs.h"
#include "btnode.h"
#include "segment.h"
#include <linux/swap.h>
#include <linux/uio.h>
#include <linux/writeback.h>
-#include <linux/aio.h>
#include <asm/page.h>
#include <asm/uaccess.h>
#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/log2.h>
-#include <linux/aio.h>
#include "aops.h"
#include "attrib.h"
#include <linux/mpage.h>
#include <linux/quotaops.h>
#include <linux/blkdev.h>
+#include <linux/uio.h>
#include <cluster/masklog.h>
#ifndef OCFS2_AOPS_H
#define OCFS2_AOPS_H
-#include <linux/aio.h>
+#include <linux/fs.h>
handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
struct page *page,
file->f_path.dentry->d_name.name,
(unsigned int)from->nr_segs); /* GRRRRR */
- if (iocb->ki_nbytes == 0)
+ if (count == 0)
return 0;
appending = file->f_flags & O_APPEND ? 1 : 0;
}
can_do_direct = direct_io;
- ret = ocfs2_prepare_inode_for_write(file, ppos,
- iocb->ki_nbytes, appending,
+ ret = ocfs2_prepare_inode_for_write(file, ppos, count, appending,
&can_do_direct, &has_refcount);
if (ret < 0) {
mlog_errno(ret);
}
if (direct_io && !is_sync_kiocb(iocb))
- unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes,
- *ppos);
+ unaligned_dio = ocfs2_is_io_unaligned(inode, count, *ppos);
/*
* We can't complete the direct I/O as requested, fall back to
#include <linux/audit.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
-#include <linux/aio.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
#include <linux/fcntl.h>
#include <linux/file.h>
#include <linux/uio.h>
-#include <linux/aio.h>
#include <linux/fsnotify.h>
#include <linux/security.h>
#include <linux/export.h>
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = iov_iter_count(iter);
iter->type |= READ;
ret = file->f_op->read_iter(&kiocb, iter);
- if (ret == -EIOCBQUEUED)
- ret = wait_on_sync_kiocb(&kiocb);
-
+ BUG_ON(ret == -EIOCBQUEUED);
if (ret > 0)
*ppos = kiocb.ki_pos;
return ret;
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = iov_iter_count(iter);
iter->type |= WRITE;
ret = file->f_op->write_iter(&kiocb, iter);
- if (ret == -EIOCBQUEUED)
- ret = wait_on_sync_kiocb(&kiocb);
-
+ BUG_ON(ret == -EIOCBQUEUED);
if (ret > 0)
*ppos = kiocb.ki_pos;
return ret;
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = len;
ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
+ BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
}
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = len;
iov_iter_init(&iter, READ, &iov, 1, len);
ret = filp->f_op->read_iter(&kiocb, &iter);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
+ BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
}
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = len;
ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
+ BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
}
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = len;
iov_iter_init(&iter, WRITE, &iov, 1, len);
ret = filp->f_op->write_iter(&kiocb, &iter);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
+ BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
}
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = len;
iov_iter_init(&iter, rw, iov, nr_segs, len);
ret = fn(&kiocb, &iter);
- if (ret == -EIOCBQUEUED)
- ret = wait_on_sync_kiocb(&kiocb);
+ BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
}
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = len;
ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
- if (ret == -EIOCBQUEUED)
- ret = wait_on_sync_kiocb(&kiocb);
+ BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
}
#include <linux/writeback.h>
#include <linux/quotaops.h>
#include <linux/swap.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
int reiserfs_commit_write(struct file *f, struct page *page,
unsigned from, unsigned to);
#include <linux/gfp.h>
#include <linux/socket.h>
#include <linux/compat.h>
-#include <linux/aio.h>
#include "internal.h"
/*
*/
#include "ubifs.h"
-#include <linux/aio.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "udf_i.h"
#include "udf_sb.h"
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
int err, pos;
- size_t count = iocb->ki_nbytes;
+ size_t count = iov_iter_count(from);
struct udf_inode_info *iinfo = UDF_I(inode);
mutex_lock(&inode->i_mutex);
#include <linux/slab.h>
#include <linux/crc-itu-t.h>
#include <linux/mpage.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "udf_i.h"
#include "udf_sb.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
-#include <linux/aio.h>
#include <linux/gfp.h>
#include <linux/mpage.h>
#include <linux/pagevec.h>
#include "xfs_icache.h"
#include "xfs_pnfs.h"
-#include <linux/aio.h>
#include <linux/dcache.h>
#include <linux/falloc.h>
#include <linux/pagevec.h>
#ifndef __LINUX__AIO_H
#define __LINUX__AIO_H
-#include <linux/list.h>
-#include <linux/workqueue.h>
#include <linux/aio_abi.h>
-#include <linux/uio.h>
-#include <linux/rcupdate.h>
-
-#include <linux/atomic.h>
struct kioctx;
struct kiocb;
+struct mm_struct;
#define KIOCB_KEY 0
-/*
- * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
- * cancelled or completed (this makes a certain amount of sense because
- * successful cancellation - io_cancel() - does deliver the completion to
- * userspace).
- *
- * And since most things don't implement kiocb cancellation and we'd really like
- * kiocb completion to be lockless when possible, we use ki_cancel to
- * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
- * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
- */
-#define KIOCB_CANCELLED ((void *) (~0ULL))
-
typedef int (kiocb_cancel_fn)(struct kiocb *);
-struct kiocb {
- struct file *ki_filp;
- struct kioctx *ki_ctx; /* NULL for sync ops */
- kiocb_cancel_fn *ki_cancel;
- void *private;
-
- union {
- void __user *user;
- struct task_struct *tsk;
- } ki_obj;
-
- __u64 ki_user_data; /* user's data for completion */
- loff_t ki_pos;
- size_t ki_nbytes; /* copy of iocb->aio_nbytes */
-
- struct list_head ki_list; /* the aio core uses this
- * for cancellation */
-
- /*
- * If the aio_resfd field of the userspace iocb is not zero,
- * this is the underlying eventfd context to deliver events to.
- */
- struct eventfd_ctx *ki_eventfd;
-};
-
-static inline bool is_sync_kiocb(struct kiocb *kiocb)
-{
- return kiocb->ki_ctx == NULL;
-}
-
-static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
-{
- *kiocb = (struct kiocb) {
- .ki_ctx = NULL,
- .ki_filp = filp,
- .ki_obj.tsk = current,
- };
-}
-
/* prototypes */
#ifdef CONFIG_AIO
-extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb);
-extern void aio_complete(struct kiocb *iocb, long res, long res2);
-struct mm_struct;
extern void exit_aio(struct mm_struct *mm);
extern long do_io_submit(aio_context_t ctx_id, long nr,
struct iocb __user *__user *iocbpp, bool compat);
void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
#else
-static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; }
-static inline void aio_complete(struct kiocb *iocb, long res, long res2) { }
-struct mm_struct;
static inline void exit_aio(struct mm_struct *mm) { }
static inline long do_io_submit(aio_context_t ctx_id, long nr,
struct iocb __user * __user *iocbpp,
kiocb_cancel_fn *cancel) { }
#endif /* CONFIG_AIO */
-static inline struct kiocb *list_kiocb(struct list_head *h)
-{
- return list_entry(h, struct kiocb, ki_list);
-}
-
/* for sysctl: */
extern unsigned long aio_nr;
extern unsigned long aio_max_nr;
#define PHY_ID_BCM5421 0x002060e0
#define PHY_ID_BCM5464 0x002060b0
#define PHY_ID_BCM5461 0x002060c0
+#define PHY_ID_BCM54616S 0x03625d10
#define PHY_ID_BCM57780 0x03625d90
#define PHY_ID_BCM7250 0xae025280
struct address_space;
struct writeback_control;
+#define IOCB_EVENTFD (1 << 0)
+
+struct kiocb {
+ struct file *ki_filp;
+ loff_t ki_pos;
+ void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
+ void *private;
+ int ki_flags;
+};
+
+static inline bool is_sync_kiocb(struct kiocb *kiocb)
+{
+ return kiocb->ki_complete == NULL;
+}
+
+static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
+{
+ *kiocb = (struct kiocb) {
+ .ki_filp = filp,
+ };
+}
+
/*
* "descriptor" for what we're up to with a read.
* This allows us to use the same read code yet
int sock_create_kern(int family, int type, int proto, struct socket **res);
int sock_create_lite(int family, int type, int proto, struct socket **res);
void sock_release(struct socket *sock);
-int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len);
+int sock_sendmsg(struct socket *sock, struct msghdr *msg);
int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags);
struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname);
* @base_addr: Device I/O address
* @irq: Device IRQ number
*
+ * @carrier_changes: Stats to monitor carrier on<->off transitions
+ *
* @state: Generic network queuing layer state, see netdev_state_t
* @dev_list: The global list of network devices
* @napi_list: List entry, that is used for polling napi devices
* @tx_dropped: Dropped packets by core network,
* do not use this in drivers
*
- * @carrier_changes: Stats to monitor carrier on<->off transitions
- *
* @wireless_handlers: List of functions to handle Wireless Extensions,
* instead of ioctl,
* see <net/iw_handler.h> for details.
* @dev_port: Used to differentiate devices that share
* the same function
* @addr_list_lock: XXX: need comments on this one
- * @uc: unicast mac addresses
- * @mc: multicast mac addresses
- * @dev_addrs: list of device hw addresses
- * @queues_kset: Group of all Kobjects in the Tx and RX queues
* @uc_promisc: Counter, that indicates, that promiscuous mode
* has been enabled due to the need to listen to
* additional unicast addresses in a device that
* does not implement ndo_set_rx_mode()
+ * @uc: unicast mac addresses
+ * @mc: multicast mac addresses
+ * @dev_addrs: list of device hw addresses
+ * @queues_kset: Group of all Kobjects in the Tx and RX queues
* @promiscuity: Number of times, the NIC is told to work in
* Promiscuous mode, if it becomes 0 the NIC will
* exit from working in Promiscuous mode
* @ingress_queue: XXX: need comments on this one
* @broadcast: hw bcast address
*
+ * @rx_cpu_rmap: CPU reverse-mapping for RX completion interrupts,
+ * indexed by RX queue number. Assigned by driver.
+ * This must only be set if the ndo_rx_flow_steer
+ * operation is defined
+ * @index_hlist: Device index hash chain
+ *
* @_tx: Array of TX queues
* @num_tx_queues: Number of TX queues allocated at alloc_netdev_mq() time
* @real_num_tx_queues: Number of TX queues currently active in device
*
* @xps_maps: XXX: need comments on this one
*
- * @rx_cpu_rmap: CPU reverse-mapping for RX completion interrupts,
- * indexed by RX queue number. Assigned by driver.
- * This must only be set if the ndo_rx_flow_steer
- * operation is defined
- *
* @trans_start: Time (in jiffies) of last Tx
* @watchdog_timeo: Represents the timeout that is used by
* the watchdog ( see dev_watchdog() )
*
* @pcpu_refcnt: Number of references to this device
* @todo_list: Delayed register/unregister
- * @index_hlist: Device index hash chain
* @link_watch_list: XXX: need comments on this one
*
* @reg_state: Register/unregister state machine
unsigned long base_addr;
int irq;
+ atomic_t carrier_changes;
+
/*
* Some hardware also needs these fields (state,dev_list,
* napi_list,unreg_list,close_list) but they are not
atomic_long_t rx_dropped;
atomic_long_t tx_dropped;
- atomic_t carrier_changes;
-
#ifdef CONFIG_WIRELESS_EXT
const struct iw_handler_def * wireless_handlers;
struct iw_public_data * wireless_data;
unsigned short dev_id;
unsigned short dev_port;
spinlock_t addr_list_lock;
+ unsigned char name_assign_type;
+ bool uc_promisc;
struct netdev_hw_addr_list uc;
struct netdev_hw_addr_list mc;
struct netdev_hw_addr_list dev_addrs;
#ifdef CONFIG_SYSFS
struct kset *queues_kset;
#endif
-
- unsigned char name_assign_type;
-
- bool uc_promisc;
unsigned int promiscuity;
unsigned int allmulti;
struct netdev_queue __rcu *ingress_queue;
unsigned char broadcast[MAX_ADDR_LEN];
-
+#ifdef CONFIG_RFS_ACCEL
+ struct cpu_rmap *rx_cpu_rmap;
+#endif
+ struct hlist_node index_hlist;
/*
* Cache lines mostly used on transmit path
struct Qdisc *qdisc;
unsigned long tx_queue_len;
spinlock_t tx_global_lock;
+ int watchdog_timeo;
#ifdef CONFIG_XPS
struct xps_dev_maps __rcu *xps_maps;
#endif
-#ifdef CONFIG_RFS_ACCEL
- struct cpu_rmap *rx_cpu_rmap;
-#endif
/* These may be needed for future network-power-down code. */
*/
unsigned long trans_start;
- int watchdog_timeo;
struct timer_list watchdog_timer;
int __percpu *pcpu_refcnt;
struct list_head todo_list;
- struct hlist_node index_hlist;
struct list_head link_watch_list;
enum { NETREG_UNINITIALIZED=0,
#endif
struct phy_device *phydev;
struct lock_class_key *qdisc_tx_busylock;
- struct pm_qos_request pm_qos_req;
};
#define to_net_dev(d) container_of(d, struct net_device, dev)
struct netlink_notify {
struct net *net;
- int portid;
+ u32 portid;
int protocol;
};
return rtnl_dereference(dev->ingress_queue);
}
-extern struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);
+struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);
+
+#ifdef CONFIG_NET_CLS_ACT
+void net_inc_ingress_queue(void);
+void net_dec_ingress_queue(void);
+#else
+static inline void net_inc_ingress_queue(void)
+{
+}
+
+static inline void net_dec_ingress_queue(void)
+{
+}
+#endif
extern void rtnetlink_init(void);
extern void __rtnl_unlock(void);
return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
}
+static inline size_t msg_data_left(struct msghdr *msg)
+{
+ return iov_iter_count(&msg->msg_iter);
+}
+
/* "Socket"-level control message types: */
#define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */
size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
+int import_iovec(int type, const struct iovec __user * uvector,
+ unsigned nr_segs, unsigned fast_segs,
+ struct iovec **iov, struct iov_iter *i);
+
+#ifdef CONFIG_COMPAT
+struct compat_iovec;
+int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
+ unsigned nr_segs, unsigned fast_segs,
+ struct iovec **iov, struct iov_iter *i);
+#endif
+
+int import_single_range(int type, void __user *buf, size_t len,
+ struct iovec *iov, struct iov_iter *i);
+
#endif
#define compat_mmsghdr mmsghdr
#endif /* defined(CONFIG_COMPAT) */
-ssize_t get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *,
+int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *,
struct sockaddr __user **, struct iovec **);
asmlinkage long compat_sys_sendmsg(int, struct compat_msghdr __user *,
unsigned int);
struct inet_hashinfo;
-#define INET_TWDR_RECYCLE_SLOTS_LOG 5
-#define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG)
-
-/*
- * If time > 4sec, it is "slow" path, no recycling is required,
- * so that we select tick to get range about 4 seconds.
- */
-#if HZ <= 16 || HZ > 4096
-# error Unsupported: HZ <= 16 or HZ > 4096
-#elif HZ <= 32
-# define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
-#elif HZ <= 64
-# define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
-#elif HZ <= 128
-# define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
-#elif HZ <= 256
-# define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
-#elif HZ <= 512
-# define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
-#elif HZ <= 1024
-# define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
-#elif HZ <= 2048
-# define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
-#else
-# define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
-#endif
-
-static inline u32 inet_tw_time_stamp(void)
-{
- return jiffies;
-}
-
-/* TIME_WAIT reaping mechanism. */
-#define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */
-
-#define INET_TWDR_TWKILL_QUOTA 100
-
struct inet_timewait_death_row {
- /* Short-time timewait calendar */
- int twcal_hand;
- unsigned long twcal_jiffie;
- struct timer_list twcal_timer;
- struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS];
-
- spinlock_t death_lock;
- int tw_count;
- int period;
- u32 thread_slots;
- struct work_struct twkill_work;
- struct timer_list tw_timer;
- int slot;
- struct hlist_head cells[INET_TWDR_TWKILL_SLOTS];
- struct inet_hashinfo *hashinfo;
+ atomic_t tw_count;
+
+ struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp;
int sysctl_tw_recycle;
int sysctl_max_tw_buckets;
};
-void inet_twdr_hangman(unsigned long data);
-void inet_twdr_twkill_work(struct work_struct *work);
-void inet_twdr_twcal_tick(unsigned long data);
-
struct inet_bind_bucket;
/*
__be16 tw_sport;
kmemcheck_bitfield_begin(flags);
/* And these are ours. */
- unsigned int tw_pad0 : 1, /* 1 bit hole */
+ unsigned int tw_kill : 1,
tw_transparent : 1,
tw_flowlabel : 20,
tw_pad : 2, /* 2 bits hole */
tw_tos : 8;
kmemcheck_bitfield_end(flags);
- u32 tw_ttd;
+ struct timer_list tw_timer;
struct inet_bind_bucket *tw_tb;
- struct hlist_node tw_death_node;
+ struct inet_timewait_death_row *tw_dr;
};
#define tw_tclass tw_tos
-static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw)
-{
- return !hlist_unhashed(&tw->tw_death_node);
-}
-
-static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw)
-{
- tw->tw_death_node.pprev = NULL;
-}
-
-static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw)
-{
- __hlist_del(&tw->tw_death_node);
- inet_twsk_dead_node_init(tw);
-}
-
-static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw)
-{
- if (inet_twsk_dead_hashed(tw)) {
- __inet_twsk_del_dead_node(tw);
- return 1;
- }
- return 0;
-}
-
-#define inet_twsk_for_each(tw, node, head) \
- hlist_nulls_for_each_entry(tw, node, head, tw_node)
-
-#define inet_twsk_for_each_inmate(tw, jail) \
- hlist_for_each_entry(tw, jail, tw_death_node)
-
-#define inet_twsk_for_each_inmate_safe(tw, safe, jail) \
- hlist_for_each_entry_safe(tw, safe, jail, tw_death_node)
-
static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
{
return (struct inet_timewait_sock *)sk;
struct inet_hashinfo *hashinfo);
struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
+ struct inet_timewait_death_row *dr,
const int state);
void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
struct inet_hashinfo *hashinfo);
-void inet_twsk_schedule(struct inet_timewait_sock *tw,
- struct inet_timewait_death_row *twdr,
- const int timeo, const int timewait_len);
-void inet_twsk_deschedule(struct inet_timewait_sock *tw,
- struct inet_timewait_death_row *twdr);
+void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo);
+void inet_twsk_deschedule(struct inet_timewait_sock *tw);
void inet_twsk_purge(struct inet_hashinfo *hashinfo,
struct inet_timewait_death_row *twdr, int family);
#include <linux/page_counter.h>
#include <linux/memcontrol.h>
#include <linux/static_key.h>
-#include <linux/aio.h>
#include <linux/sched.h>
#include <linux/filter.h>
#define RXRPC_PACKET_TYPE_CHALLENGE 6 /* connection security challenge (SRVR->CLNT) */
#define RXRPC_PACKET_TYPE_RESPONSE 7 /* connection secutity response (CLNT->SRVR) */
#define RXRPC_PACKET_TYPE_DEBUG 8 /* debug info request */
-#define RXRPC_N_PACKET_TYPES 9 /* number of packet types (incl type 0) */
+#define RXRPC_PACKET_TYPE_VERSION 13 /* version string request */
+#define RXRPC_N_PACKET_TYPES 14 /* number of packet types (incl type 0) */
uint8_t flags; /* packet flags */
#define RXRPC_CLIENT_INITIATED 0x01 /* signifies a packet generated by a client */
#include <linux/security.h>
#include <linux/bootmem.h>
#include <linux/memblock.h>
-#include <linux/aio.h>
#include <linux/syscalls.h>
#include <linux/kexec.h>
#include <linux/kdb.h>
#include <linux/irq_work.h>
#include <linux/utsname.h>
#include <linux/ctype.h>
+#include <linux/uio.h>
#include <asm/uaccess.h>
int i;
int level = default_message_loglevel;
int facility = 1; /* LOG_USER */
- size_t len = iocb->ki_nbytes;
+ size_t len = iov_iter_count(from);
ssize_t ret = len;
if (len > LOG_LINE_MAX)
*/
#include <linux/module.h>
+#include <linux/aio.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/slab.h>
flags);
}
EXPORT_SYMBOL(dup_iter);
+
+int import_iovec(int type, const struct iovec __user * uvector,
+ unsigned nr_segs, unsigned fast_segs,
+ struct iovec **iov, struct iov_iter *i)
+{
+ ssize_t n;
+ struct iovec *p;
+ n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
+ *iov, &p);
+ if (n < 0) {
+ if (p != *iov)
+ kfree(p);
+ *iov = NULL;
+ return n;
+ }
+ iov_iter_init(i, type, p, nr_segs, n);
+ *iov = p == *iov ? NULL : p;
+ return 0;
+}
+EXPORT_SYMBOL(import_iovec);
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+
+int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
+ unsigned nr_segs, unsigned fast_segs,
+ struct iovec **iov, struct iov_iter *i)
+{
+ ssize_t n;
+ struct iovec *p;
+ n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
+ *iov, &p);
+ if (n < 0) {
+ if (p != *iov)
+ kfree(p);
+ *iov = NULL;
+ return n;
+ }
+ iov_iter_init(i, type, p, nr_segs, n);
+ *iov = p == *iov ? NULL : p;
+ return 0;
+}
+#endif
+
+int import_single_range(int rw, void __user *buf, size_t len,
+ struct iovec *iov, struct iov_iter *i)
+{
+ if (len > MAX_RW_COUNT)
+ len = MAX_RW_COUNT;
+ if (unlikely(!access_ok(!rw, buf, len)))
+ return -EFAULT;
+
+ iov->iov_base = buf;
+ iov->iov_len = len;
+ iov_iter_init(i, rw, iov, 1, len);
+ return 0;
+}
#include <linux/compiler.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
-#include <linux/aio.h>
#include <linux/capability.h>
#include <linux/kernel_stat.h>
#include <linux/gfp.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/frontswap.h>
-#include <linux/aio.h>
#include <linux/blkdev.h>
+#include <linux/uio.h>
#include <asm/pgtable.h>
static struct bio *get_swap_bio(gfp_t gfp_flags,
iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE);
init_sync_kiocb(&kiocb, swap_file);
kiocb.ki_pos = page_file_offset(page);
- kiocb.ki_nbytes = PAGE_SIZE;
set_page_writeback(page);
unlock_page(page);
#include <linux/mm.h>
#include <linux/export.h>
#include <linux/swap.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
static struct vfsmount *shm_mnt;
#include <asm/uaccess.h>
#include <net/compat.h>
-ssize_t get_compat_msghdr(struct msghdr *kmsg,
- struct compat_msghdr __user *umsg,
- struct sockaddr __user **save_addr,
- struct iovec **iov)
+int get_compat_msghdr(struct msghdr *kmsg,
+ struct compat_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ struct iovec **iov)
{
compat_uptr_t uaddr, uiov, tmp3;
compat_size_t nr_segs;
kmsg->msg_iocb = NULL;
- err = compat_rw_copy_check_uvector(save_addr ? READ : WRITE,
- compat_ptr(uiov), nr_segs,
- UIO_FASTIOV, *iov, iov);
- if (err >= 0)
- iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
- *iov, nr_segs, err);
- return err;
+ return compat_import_iovec(save_addr ? READ : WRITE,
+ compat_ptr(uiov), nr_segs,
+ UIO_FASTIOV, iov, &kmsg->msg_iter);
}
/* Bleech... */
if (!chunk)
return 0;
- if (iov_iter_count(&msg->msg_iter) < chunk) {
+ if (msg_data_left(msg) < chunk) {
if (__skb_checksum_complete(skb))
goto csum_error;
if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
}
EXPORT_SYMBOL(call_netdevice_notifiers);
+#ifdef CONFIG_NET_CLS_ACT
+static struct static_key ingress_needed __read_mostly;
+
+void net_inc_ingress_queue(void)
+{
+ static_key_slow_inc(&ingress_needed);
+}
+EXPORT_SYMBOL_GPL(net_inc_ingress_queue);
+
+void net_dec_ingress_queue(void)
+{
+ static_key_slow_dec(&ingress_needed);
+}
+EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
+#endif
+
static struct static_key netstamp_needed __read_mostly;
#ifdef HAVE_JUMP_LABEL
/* We are not allowed to call static_key_slow_dec() from irq context
struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc)
- goto out;
+ return skb;
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
return NULL;
}
-out:
- skb->tc_verd = 0;
return skb;
}
#endif
skip_taps:
#ifdef CONFIG_NET_CLS_ACT
- skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
- if (!skb)
- goto unlock;
+ if (static_key_false(&ingress_needed)) {
+ skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
+ if (!skb)
+ goto unlock;
+ }
+
+ skb->tc_verd = 0;
ncls:
#endif
-
if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
goto drop;
struct inet_timewait_death_row dccp_death_row = {
.sysctl_max_tw_buckets = NR_FILE * 2,
- .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
- .death_lock = __SPIN_LOCK_UNLOCKED(dccp_death_row.death_lock),
.hashinfo = &dccp_hashinfo,
- .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
- (unsigned long)&dccp_death_row),
- .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work,
- inet_twdr_twkill_work),
-/* Short-time timewait calendar */
-
- .twcal_hand = -1,
- .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
- (unsigned long)&dccp_death_row),
};
EXPORT_SYMBOL_GPL(dccp_death_row);
void dccp_time_wait(struct sock *sk, int state, int timeo)
{
- struct inet_timewait_sock *tw = NULL;
+ struct inet_timewait_sock *tw;
- if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets)
- tw = inet_twsk_alloc(sk, state);
+ tw = inet_twsk_alloc(sk, &dccp_death_row, state);
if (tw != NULL) {
const struct inet_connection_sock *icsk = inet_csk(sk);
if (state == DCCP_TIME_WAIT)
timeo = DCCP_TIMEWAIT_LEN;
- inet_twsk_schedule(tw, &dccp_death_row, timeo,
- DCCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, timeo);
inet_twsk_put(tw);
} else {
/* Sorry, if we're out of memory, just CLOSE this
#ifdef CONFIG_NET_FOU_IP_TUNNELS
-static const struct ip_tunnel_encap_ops __read_mostly fou_iptun_ops = {
+static const struct ip_tunnel_encap_ops fou_iptun_ops = {
.encap_hlen = fou_encap_hlen,
.build_header = fou_build_header,
};
-static const struct ip_tunnel_encap_ops __read_mostly gue_iptun_ops = {
+static const struct ip_tunnel_encap_ops gue_iptun_ops = {
.encap_hlen = gue_encap_hlen,
.build_header = gue_build_header,
};
int min_headroom;
int err;
- skb = udp_tunnel_handle_offloads(skb, csum);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
if (unlikely(!skb))
return -ENOMEM;
+ skb = udp_tunnel_handle_offloads(skb, csum);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
struct inet_timewait_sock *tw = inet_twsk(sk);
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
- s32 tmo;
+ long tmo;
nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
nlmsg_flags);
r = nlmsg_data(nlh);
BUG_ON(tw->tw_state != TCP_TIME_WAIT);
- tmo = tw->tw_ttd - inet_tw_time_stamp();
+ tmo = tw->tw_timer.expires - jiffies;
if (tmo < 0)
tmo = 0;
*twp = tw;
} else if (tw) {
/* Silly. Should hash-dance instead... */
- inet_twsk_deschedule(tw, death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
}
spin_unlock(&head->lock);
if (tw) {
- inet_twsk_deschedule(tw, death_row);
+ inet_twsk_deschedule(tw);
while (twrefcnt) {
twrefcnt--;
inet_twsk_put(tw);
}
/* Must be called with locally disabled BHs. */
-static void __inet_twsk_kill(struct inet_timewait_sock *tw,
- struct inet_hashinfo *hashinfo)
+static void inet_twsk_kill(struct inet_timewait_sock *tw)
{
+ struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
struct inet_bind_hashbucket *bhead;
int refcnt;
/* Unlink from established hashes. */
BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt));
atomic_sub(refcnt, &tw->tw_refcnt);
+ atomic_dec(&tw->tw_dr->tw_count);
+ inet_twsk_put(tw);
}
void inet_twsk_free(struct inet_timewait_sock *tw)
}
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
-struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
+void tw_timer_handler(unsigned long data)
{
- struct inet_timewait_sock *tw =
- kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
- GFP_ATOMIC);
+ struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data;
+
+ if (tw->tw_kill)
+ NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
+ else
+ NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED);
+ inet_twsk_kill(tw);
+}
+
+struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
+ struct inet_timewait_death_row *dr,
+ const int state)
+{
+ struct inet_timewait_sock *tw;
+
+ if (atomic_read(&dr->tw_count) >= dr->sysctl_max_tw_buckets)
+ return NULL;
+
+ tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
+ GFP_ATOMIC);
if (tw) {
const struct inet_sock *inet = inet_sk(sk);
kmemcheck_annotate_bitfield(tw, flags);
+ tw->tw_dr = dr;
/* Give us an identity. */
tw->tw_daddr = inet->inet_daddr;
tw->tw_rcv_saddr = inet->inet_rcv_saddr;
tw->tw_prot = sk->sk_prot_creator;
atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
twsk_net_set(tw, sock_net(sk));
+ setup_timer(&tw->tw_timer, tw_timer_handler, (unsigned long)tw);
/*
* Because we use RCU lookups, we should not set tw_refcnt
* to a non null value before everything is setup for this
* timewait socket.
*/
atomic_set(&tw->tw_refcnt, 0);
- inet_twsk_dead_node_init(tw);
+
__module_get(tw->tw_prot->owner);
}
}
EXPORT_SYMBOL_GPL(inet_twsk_alloc);
-/* Returns non-zero if quota exceeded. */
-static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
- const int slot)
-{
- struct inet_timewait_sock *tw;
- unsigned int killed;
- int ret;
-
- /* NOTE: compare this to previous version where lock
- * was released after detaching chain. It was racy,
- * because tw buckets are scheduled in not serialized context
- * in 2.3 (with netfilter), and with softnet it is common, because
- * soft irqs are not sequenced.
- */
- killed = 0;
- ret = 0;
-rescan:
- inet_twsk_for_each_inmate(tw, &twdr->cells[slot]) {
- __inet_twsk_del_dead_node(tw);
- spin_unlock(&twdr->death_lock);
- __inet_twsk_kill(tw, twdr->hashinfo);
-#ifdef CONFIG_NET_NS
- NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED);
-#endif
- inet_twsk_put(tw);
- killed++;
- spin_lock(&twdr->death_lock);
- if (killed > INET_TWDR_TWKILL_QUOTA) {
- ret = 1;
- break;
- }
-
- /* While we dropped twdr->death_lock, another cpu may have
- * killed off the next TW bucket in the list, therefore
- * do a fresh re-read of the hlist head node with the
- * lock reacquired. We still use the hlist traversal
- * macro in order to get the prefetches.
- */
- goto rescan;
- }
-
- twdr->tw_count -= killed;
-#ifndef CONFIG_NET_NS
- NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed);
-#endif
- return ret;
-}
-
-void inet_twdr_hangman(unsigned long data)
-{
- struct inet_timewait_death_row *twdr;
- unsigned int need_timer;
-
- twdr = (struct inet_timewait_death_row *)data;
- spin_lock(&twdr->death_lock);
-
- if (twdr->tw_count == 0)
- goto out;
-
- need_timer = 0;
- if (inet_twdr_do_twkill_work(twdr, twdr->slot)) {
- twdr->thread_slots |= (1 << twdr->slot);
- schedule_work(&twdr->twkill_work);
- need_timer = 1;
- } else {
- /* We purged the entire slot, anything left? */
- if (twdr->tw_count)
- need_timer = 1;
- twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
- }
- if (need_timer)
- mod_timer(&twdr->tw_timer, jiffies + twdr->period);
-out:
- spin_unlock(&twdr->death_lock);
-}
-EXPORT_SYMBOL_GPL(inet_twdr_hangman);
-
-void inet_twdr_twkill_work(struct work_struct *work)
-{
- struct inet_timewait_death_row *twdr =
- container_of(work, struct inet_timewait_death_row, twkill_work);
- int i;
-
- BUILD_BUG_ON((INET_TWDR_TWKILL_SLOTS - 1) >
- (sizeof(twdr->thread_slots) * 8));
-
- while (twdr->thread_slots) {
- spin_lock_bh(&twdr->death_lock);
- for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) {
- if (!(twdr->thread_slots & (1 << i)))
- continue;
-
- while (inet_twdr_do_twkill_work(twdr, i) != 0) {
- if (need_resched()) {
- spin_unlock_bh(&twdr->death_lock);
- schedule();
- spin_lock_bh(&twdr->death_lock);
- }
- }
-
- twdr->thread_slots &= ~(1 << i);
- }
- spin_unlock_bh(&twdr->death_lock);
- }
-}
-EXPORT_SYMBOL_GPL(inet_twdr_twkill_work);
-
/* These are always called from BH context. See callers in
* tcp_input.c to verify this.
*/
/* This is for handling early-kills of TIME_WAIT sockets. */
-void inet_twsk_deschedule(struct inet_timewait_sock *tw,
- struct inet_timewait_death_row *twdr)
+void inet_twsk_deschedule(struct inet_timewait_sock *tw)
{
- spin_lock(&twdr->death_lock);
- if (inet_twsk_del_dead_node(tw)) {
- inet_twsk_put(tw);
- if (--twdr->tw_count == 0)
- del_timer(&twdr->tw_timer);
- }
- spin_unlock(&twdr->death_lock);
- __inet_twsk_kill(tw, twdr->hashinfo);
+ if (del_timer_sync(&tw->tw_timer))
+ inet_twsk_kill(tw);
}
EXPORT_SYMBOL(inet_twsk_deschedule);
-void inet_twsk_schedule(struct inet_timewait_sock *tw,
- struct inet_timewait_death_row *twdr,
- const int timeo, const int timewait_len)
+void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo)
{
- struct hlist_head *list;
- int slot;
-
/* timeout := RTO * 3.5
*
* 3.5 = 1+2+0.5 to wait for two retransmits.
* is greater than TS tick!) and detect old duplicates with help
* of PAWS.
*/
- slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK;
- spin_lock(&twdr->death_lock);
-
- /* Unlink it, if it was scheduled */
- if (inet_twsk_del_dead_node(tw))
- twdr->tw_count--;
- else
+ tw->tw_kill = timeo <= 4*HZ;
+ if (!mod_timer_pinned(&tw->tw_timer, jiffies + timeo)) {
atomic_inc(&tw->tw_refcnt);
-
- if (slot >= INET_TWDR_RECYCLE_SLOTS) {
- /* Schedule to slow timer */
- if (timeo >= timewait_len) {
- slot = INET_TWDR_TWKILL_SLOTS - 1;
- } else {
- slot = DIV_ROUND_UP(timeo, twdr->period);
- if (slot >= INET_TWDR_TWKILL_SLOTS)
- slot = INET_TWDR_TWKILL_SLOTS - 1;
- }
- tw->tw_ttd = inet_tw_time_stamp() + timeo;
- slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1);
- list = &twdr->cells[slot];
- } else {
- tw->tw_ttd = inet_tw_time_stamp() + (slot << INET_TWDR_RECYCLE_TICK);
-
- if (twdr->twcal_hand < 0) {
- twdr->twcal_hand = 0;
- twdr->twcal_jiffie = jiffies;
- twdr->twcal_timer.expires = twdr->twcal_jiffie +
- (slot << INET_TWDR_RECYCLE_TICK);
- add_timer(&twdr->twcal_timer);
- } else {
- if (time_after(twdr->twcal_timer.expires,
- jiffies + (slot << INET_TWDR_RECYCLE_TICK)))
- mod_timer(&twdr->twcal_timer,
- jiffies + (slot << INET_TWDR_RECYCLE_TICK));
- slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1);
- }
- list = &twdr->twcal_row[slot];
+ atomic_inc(&tw->tw_dr->tw_count);
}
-
- hlist_add_head(&tw->tw_death_node, list);
-
- if (twdr->tw_count++ == 0)
- mod_timer(&twdr->tw_timer, jiffies + twdr->period);
- spin_unlock(&twdr->death_lock);
}
EXPORT_SYMBOL_GPL(inet_twsk_schedule);
-void inet_twdr_twcal_tick(unsigned long data)
-{
- struct inet_timewait_death_row *twdr;
- int n, slot;
- unsigned long j;
- unsigned long now = jiffies;
- int killed = 0;
- int adv = 0;
-
- twdr = (struct inet_timewait_death_row *)data;
-
- spin_lock(&twdr->death_lock);
- if (twdr->twcal_hand < 0)
- goto out;
-
- slot = twdr->twcal_hand;
- j = twdr->twcal_jiffie;
-
- for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) {
- if (time_before_eq(j, now)) {
- struct hlist_node *safe;
- struct inet_timewait_sock *tw;
-
- inet_twsk_for_each_inmate_safe(tw, safe,
- &twdr->twcal_row[slot]) {
- __inet_twsk_del_dead_node(tw);
- __inet_twsk_kill(tw, twdr->hashinfo);
-#ifdef CONFIG_NET_NS
- NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
-#endif
- inet_twsk_put(tw);
- killed++;
- }
- } else {
- if (!adv) {
- adv = 1;
- twdr->twcal_jiffie = j;
- twdr->twcal_hand = slot;
- }
-
- if (!hlist_empty(&twdr->twcal_row[slot])) {
- mod_timer(&twdr->twcal_timer, j);
- goto out;
- }
- }
- j += 1 << INET_TWDR_RECYCLE_TICK;
- slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1);
- }
- twdr->twcal_hand = -1;
-
-out:
- if ((twdr->tw_count -= killed) == 0)
- del_timer(&twdr->tw_timer);
-#ifndef CONFIG_NET_NS
- NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed);
-#endif
- spin_unlock(&twdr->death_lock);
-}
-EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick);
-
void inet_twsk_purge(struct inet_hashinfo *hashinfo,
struct inet_timewait_death_row *twdr, int family)
{
rcu_read_unlock();
local_bh_disable();
- inet_twsk_deschedule(tw, twdr);
+ inet_twsk_deschedule(tw);
local_bh_enable();
inet_twsk_put(tw);
goto restart_rcu;
socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
sock_prot_inuse_get(net, &tcp_prot), orphans,
- tcp_death_row.tw_count, sockets,
+ atomic_read(&tcp_death_row.tw_count), sockets,
proto_memory_allocated(&tcp_prot));
seq_printf(seq, "UDP: inuse %d mem %ld\n",
sock_prot_inuse_get(net, &udp_prot),
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/errno.h>
-#include <linux/aio.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/spinlock.h>
sg = !!(sk->sk_route_caps & NETIF_F_SG);
- while (iov_iter_count(&msg->msg_iter)) {
+ while (msg_data_left(msg)) {
int copy = 0;
int max = size_goal;
}
/* Try to append data to the end of skb. */
- if (copy > iov_iter_count(&msg->msg_iter))
- copy = iov_iter_count(&msg->msg_iter);
+ if (copy > msg_data_left(msg))
+ copy = msg_data_left(msg);
/* Where to copy to? */
if (skb_availroom(skb) > 0) {
tcp_skb_pcount_set(skb, 0);
copied += copy;
- if (!iov_iter_count(&msg->msg_iter)) {
+ if (!msg_data_left(msg)) {
tcp_tx_timestamp(sk, skb);
goto out;
}
if (sacked & TCPCB_SACKED_RETRANS)
tp->retrans_out -= acked_pcount;
flag |= FLAG_RETRANS_DATA_ACKED;
- } else {
+ } else if (!(sacked & TCPCB_SACKED_ACKED)) {
last_ackt = skb->skb_mstamp;
WARN_ON_ONCE(last_ackt.v64 == 0);
if (!first_ackt.v64)
first_ackt = last_ackt;
- if (!(sacked & TCPCB_SACKED_ACKED)) {
- reord = min(pkts_acked, reord);
- if (!after(scb->end_seq, tp->high_seq))
- flag |= FLAG_ORIG_SACK_ACKED;
- }
+ reord = min(pkts_acked, reord);
+ if (!after(scb->end_seq, tp->high_seq))
+ flag |= FLAG_ORIG_SACK_ACKED;
}
if (sacked & TCPCB_SACKED_ACKED)
iph->daddr, th->dest,
inet_iif(skb));
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_deschedule(inet_twsk(sk));
inet_twsk_put(inet_twsk(sk));
sk = sk2;
goto process;
static void get_timewait4_sock(const struct inet_timewait_sock *tw,
struct seq_file *f, int i)
{
+ long delta = tw->tw_timer.expires - jiffies;
__be32 dest, src;
__u16 destp, srcp;
- s32 delta = tw->tw_ttd - inet_tw_time_stamp();
dest = tw->tw_daddr;
src = tw->tw_rcv_saddr;
struct inet_timewait_death_row tcp_death_row = {
.sysctl_max_tw_buckets = NR_FILE * 2,
- .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
- .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock),
.hashinfo = &tcp_hashinfo,
- .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
- (unsigned long)&tcp_death_row),
- .twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work,
- inet_twdr_twkill_work),
-/* Short-time timewait calendar */
-
- .twcal_hand = -1,
- .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
- (unsigned long)&tcp_death_row),
};
EXPORT_SYMBOL_GPL(tcp_death_row);
if (!th->fin ||
TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) {
kill_with_rst:
- inet_twsk_deschedule(tw, &tcp_death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
return TCP_TW_RST;
}
if (tcp_death_row.sysctl_tw_recycle &&
tcptw->tw_ts_recent_stamp &&
tcp_tw_remember_stamp(tw))
- inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout,
- TCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, tw->tw_timeout);
else
- inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
- TCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN);
return TCP_TW_ACK;
}
*/
if (sysctl_tcp_rfc1337 == 0) {
kill:
- inet_twsk_deschedule(tw, &tcp_death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
return TCP_TW_SUCCESS;
}
}
- inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
- TCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN);
if (tmp_opt.saw_tstamp) {
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
* Do not reschedule in the last case.
*/
if (paws_reject || th->ack)
- inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
- TCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN);
return tcp_timewait_check_oow_rate_limit(
tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT);
*/
void tcp_time_wait(struct sock *sk, int state, int timeo)
{
- struct inet_timewait_sock *tw = NULL;
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_timewait_sock *tw;
bool recycle_ok = false;
if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
recycle_ok = tcp_remember_stamp(sk);
- if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
- tw = inet_twsk_alloc(sk, state);
+ tw = inet_twsk_alloc(sk, &tcp_death_row, state);
if (tw) {
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
timeo = TCP_TIMEWAIT_LEN;
}
- inet_twsk_schedule(tw, &tcp_death_row, timeo,
- TCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, timeo);
inet_twsk_put(tw);
} else {
/* Sorry, if we're out of memory, just CLOSE this
rcu_read_unlock();
#endif
+ /* Do not fool tcpdump (if any), clean our debris */
+ skb->tstamp.tv64 = 0;
return skb;
}
EXPORT_SYMBOL(tcp_make_synack);
*twp = tw;
} else if (tw) {
/* Silly. Should hash-dance instead... */
- inet_twsk_deschedule(tw, death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
}
static void vti6_dev_uninit(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct net *net = dev_net(dev);
- struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ struct vti6_net *ip6n = net_generic(t->net, vti6_net_id);
if (dev == ip6n->fb_tnl_dev)
RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
ntohs(th->dest), tcp_v6_iif(skb));
if (sk2) {
struct inet_timewait_sock *tw = inet_twsk(sk);
- inet_twsk_deschedule(tw, &tcp_death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
sk = sk2;
tcp_v6_restore_cb(skb);
static void get_timewait6_sock(struct seq_file *seq,
struct inet_timewait_sock *tw, int i)
{
+ long delta = tw->tw_timer.expires - jiffies;
const struct in6_addr *dest, *src;
__u16 destp, srcp;
- s32 delta = tw->tw_ttd - inet_tw_time_stamp();
dest = &tw->tw_v6_daddr;
src = &tw->tw_v6_rcv_saddr;
struct timer_list timer;
struct net *net;
struct user_namespace *peer_user_ns; /* User namespace of the peer process */
- int peer_portid; /* PORTID of the peer process */
+ u32 peer_portid; /* PORTID of the peer process */
/* configurable parameters */
unsigned int flushtimeout; /* timeout until queue flush */
static struct nfulnl_instance *
instance_create(struct net *net, u_int16_t group_num,
- int portid, struct user_namespace *user_ns)
+ u32 portid, struct user_namespace *user_ns)
{
struct nfulnl_instance *inst;
struct nfnl_log_net *log = nfnl_log_pernet(net);
{
const struct nfulnl_instance *inst = v;
- seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n",
+ seq_printf(s, "%5u %6u %5u %1u %5u %6u %2u\n",
inst->group_num,
inst->peer_portid, inst->qlen,
inst->copy_mode, inst->copy_range,
struct hlist_node hlist; /* global list of queues */
struct rcu_head rcu;
- int peer_portid;
+ u32 peer_portid;
unsigned int queue_maxlen;
unsigned int copy_range;
unsigned int queue_dropped;
}
static struct nfqnl_instance *
-instance_create(struct nfnl_queue_net *q, u_int16_t queue_num,
- int portid)
+instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
{
struct nfqnl_instance *inst;
unsigned int h;
};
static struct nfqnl_instance *
-verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid)
+verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, u32 nlportid)
{
struct nfqnl_instance *queue;
{
const struct nfqnl_instance *inst = v;
- seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n",
+ seq_printf(s, "%5u %6u %5u %1u %5u %5u %5u %8u %2d\n",
inst->queue_num,
inst->peer_portid, inst->queue_total,
inst->copy_mode, inst->copy_range,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_deschedule(inet_twsk(sk));
inet_twsk_put(inet_twsk(sk));
sk = sk2;
}
tgi->lport ? tgi->lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_deschedule(inet_twsk(sk));
inet_twsk_put(inet_twsk(sk));
sk = sk2;
}
struct urelease_work {
struct work_struct w;
- int portid;
+ u32 portid;
};
static void nfc_urelease_event_work(struct work_struct *work)
rcu_read_lock();
conn = rds_conn_lookup(head, laddr, faddr, trans);
if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
- !is_outgoing) {
+ laddr == faddr && !is_outgoing) {
/* This is a looped back IB connection, and we're
* called by the code handling the incoming connect.
* We need a second connection object into which we
}
atomic_set(&conn->c_state, RDS_CONN_DOWN);
+ conn->c_send_gen = 0;
conn->c_reconnect_jiffies = 0;
INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
void *c_transport_data;
atomic_t c_state;
+ unsigned long c_send_gen;
unsigned long c_flags;
unsigned long c_reconnect_jiffies;
struct delayed_work c_send_w;
struct scatterlist *sg;
int ret = 0;
LIST_HEAD(to_be_dropped);
+ int batch_count;
+ unsigned long send_gen = 0;
restart:
+ batch_count = 0;
/*
* sendmsg calls here after having queued its message on the send
goto out;
}
+ /*
+ * we record the send generation after doing the xmit acquire.
+ * if someone else manages to jump in and do some work, we'll use
+ * this to avoid a goto restart farther down.
+ *
+ * The acquire_in_xmit() check above ensures that only one
+ * caller can increment c_send_gen at any time.
+ */
+ conn->c_send_gen++;
+ send_gen = conn->c_send_gen;
+
/*
* rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT,
* we do the opposite to avoid races.
if (!rm) {
unsigned int len;
+ batch_count++;
+
+ /* we want to process as big a batch as we can, but
+ * we also want to avoid softlockups. If we've been
+ * through a lot of messages, lets back off and see
+ * if anyone else jumps in
+ */
+ if (batch_count >= 1024)
+ goto over_batch;
+
spin_lock_irqsave(&conn->c_lock, flags);
if (!list_empty(&conn->c_send_queue)) {
}
}
+over_batch:
if (conn->c_trans->xmit_complete)
conn->c_trans->xmit_complete(conn);
-
release_in_xmit(conn);
/* Nuke any messages we decided not to retransmit. */
* If the transport cannot continue (i.e ret != 0), then it must
* call us when more room is available, such as from the tx
* completion handler.
+ *
+ * We have an extra generation check here so that if someone manages
+ * to jump in after our release_in_xmit, we'll see that they have done
+ * some work and we will skip our goto
*/
if (ret == 0) {
smp_mb();
- if (!list_empty(&conn->c_send_queue)) {
+ if (!list_empty(&conn->c_send_queue) &&
+ send_gen == conn->c_send_gen) {
rds_stats_inc(s_send_lock_queue_raced);
goto restart;
}
const char *rxrpc_pkts[] = {
"?00",
"DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG",
- "?09", "?10", "?11", "?12", "?13", "?14", "?15"
+ "?09", "?10", "?11", "?12", "VERSION", "?14", "?15"
};
/*
rxrpc_queue_conn(conn);
}
+/*
+ * post endpoint-level events to the local endpoint
+ * - this includes debug and version messages
+ */
+static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
+ struct sk_buff *skb)
+{
+ _enter("%p,%p", local, skb);
+
+ atomic_inc(&local->usage);
+ skb_queue_tail(&local->event_queue, skb);
+ rxrpc_queue_work(&local->event_processor);
+}
+
static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local,
struct sk_buff *skb,
struct rxrpc_skb_priv *sp)
goto bad_message;
}
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_VERSION) {
+ rxrpc_post_packet_to_local(local, skb);
+ goto out;
+ }
+
if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
(sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
goto bad_message;
else
goto cant_route_call;
}
+
+out:
rxrpc_put_local(local);
return;
struct work_struct destroyer; /* endpoint destroyer */
struct work_struct acceptor; /* incoming call processor */
struct work_struct rejecter; /* packet reject writer */
+ struct work_struct event_processor; /* endpoint event processor */
struct list_head services; /* services listening on this endpoint */
struct list_head link; /* link in endpoint list */
struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
struct sk_buff_head accept_queue; /* incoming calls awaiting acceptance */
struct sk_buff_head reject_queue; /* packets awaiting rejection */
+ struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */
spinlock_t lock; /* access lock */
rwlock_t services_lock; /* lock for services list */
atomic_t usage;
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
+#include <generated/utsrelease.h>
#include "ar-internal.h"
+static const char rxrpc_version_string[65] = "linux-" UTS_RELEASE " AF_RXRPC";
+
static LIST_HEAD(rxrpc_locals);
DEFINE_RWLOCK(rxrpc_local_lock);
static DECLARE_RWSEM(rxrpc_local_sem);
static DECLARE_WAIT_QUEUE_HEAD(rxrpc_local_wq);
static void rxrpc_destroy_local(struct work_struct *work);
+static void rxrpc_process_local_events(struct work_struct *work);
/*
* allocate a new local
INIT_WORK(&local->destroyer, &rxrpc_destroy_local);
INIT_WORK(&local->acceptor, &rxrpc_accept_incoming_calls);
INIT_WORK(&local->rejecter, &rxrpc_reject_packets);
+ INIT_WORK(&local->event_processor, &rxrpc_process_local_events);
INIT_LIST_HEAD(&local->services);
INIT_LIST_HEAD(&local->link);
init_rwsem(&local->defrag_sem);
skb_queue_head_init(&local->accept_queue);
skb_queue_head_init(&local->reject_queue);
+ skb_queue_head_init(&local->event_queue);
spin_lock_init(&local->lock);
rwlock_init(&local->services_lock);
atomic_set(&local->usage, 1);
ASSERT(list_empty(&local->services));
ASSERT(!work_pending(&local->acceptor));
ASSERT(!work_pending(&local->rejecter));
+ ASSERT(!work_pending(&local->event_processor));
/* finish cleaning up the local descriptor */
rxrpc_purge_queue(&local->accept_queue);
rxrpc_purge_queue(&local->reject_queue);
+ rxrpc_purge_queue(&local->event_queue);
kernel_sock_shutdown(local->socket, SHUT_RDWR);
sock_release(local->socket);
_leave("");
}
+
+/*
+ * Reply to a version request
+ */
+static void rxrpc_send_version_request(struct rxrpc_local *local,
+ struct rxrpc_header *hdr,
+ struct sk_buff *skb)
+{
+ struct sockaddr_in sin;
+ struct msghdr msg;
+ struct kvec iov[2];
+ size_t len;
+ int ret;
+
+ _enter("");
+
+ sin.sin_family = AF_INET;
+ sin.sin_port = udp_hdr(skb)->source;
+ sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+
+ msg.msg_name = &sin;
+ msg.msg_namelen = sizeof(sin);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ hdr->seq = 0;
+ hdr->serial = 0;
+ hdr->type = RXRPC_PACKET_TYPE_VERSION;
+ hdr->flags = RXRPC_LAST_PACKET | (~hdr->flags & RXRPC_CLIENT_INITIATED);
+ hdr->userStatus = 0;
+ hdr->_rsvd = 0;
+
+ iov[0].iov_base = hdr;
+ iov[0].iov_len = sizeof(*hdr);
+ iov[1].iov_base = (char *)rxrpc_version_string;
+ iov[1].iov_len = sizeof(rxrpc_version_string);
+
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ _proto("Tx VERSION (reply)");
+
+ ret = kernel_sendmsg(local->socket, &msg, iov, 2, len);
+ if (ret < 0)
+ _debug("sendmsg failed: %d", ret);
+
+ _leave("");
+}
+
+/*
+ * Process event packets targetted at a local endpoint.
+ */
+static void rxrpc_process_local_events(struct work_struct *work)
+{
+ struct rxrpc_local *local = container_of(work, struct rxrpc_local, event_processor);
+ struct sk_buff *skb;
+ char v;
+
+ _enter("");
+
+ atomic_inc(&local->usage);
+
+ while ((skb = skb_dequeue(&local->event_queue))) {
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ kdebug("{%d},{%u}", local->debug_id, sp->hdr.type);
+
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_VERSION:
+ if (skb_copy_bits(skb, 0, &v, 1) < 0)
+ return;
+ _proto("Rx VERSION { %02x }", v);
+ if (v == 0)
+ rxrpc_send_version_request(local, &sp->hdr, skb);
+ break;
+
+ default:
+ /* Just ignore anything we don't understand */
+ break;
+ }
+
+ rxrpc_put_local(local);
+ rxrpc_free_skb(skb);
+ }
+
+ rxrpc_put_local(local);
+ _leave("");
+}
call->tx_pending = NULL;
copied = 0;
- if (len > iov_iter_count(&msg->msg_iter))
- len = iov_iter_count(&msg->msg_iter);
- while (len) {
- int copy;
-
+ do {
if (!skb) {
size_t size, chunk, max, space;
max &= ~(call->conn->size_align - 1UL);
chunk = max;
- if (chunk > len && !more)
- chunk = len;
+ if (chunk > msg_data_left(msg) && !more)
+ chunk = msg_data_left(msg);
space = chunk + call->conn->size_align;
space &= ~(call->conn->size_align - 1UL);
sp = rxrpc_skb(skb);
/* append next segment of data to the current buffer */
- copy = skb_tailroom(skb);
- ASSERTCMP(copy, >, 0);
- if (copy > len)
- copy = len;
- if (copy > sp->remain)
- copy = sp->remain;
-
- _debug("add");
- ret = skb_add_data(skb, &msg->msg_iter, copy);
- _debug("added");
- if (ret < 0)
- goto efault;
- sp->remain -= copy;
- skb->mark += copy;
- copied += copy;
-
- len -= copy;
+ if (msg_data_left(msg) > 0) {
+ int copy = skb_tailroom(skb);
+ ASSERTCMP(copy, >, 0);
+ if (copy > msg_data_left(msg))
+ copy = msg_data_left(msg);
+ if (copy > sp->remain)
+ copy = sp->remain;
+
+ _debug("add");
+ ret = skb_add_data(skb, &msg->msg_iter, copy);
+ _debug("added");
+ if (ret < 0)
+ goto efault;
+ sp->remain -= copy;
+ skb->mark += copy;
+ copied += copy;
+ }
/* check for the far side aborting the call or a network error
* occurring */
goto call_aborted;
/* add the packet to the send queue if it's now full */
- if (sp->remain <= 0 || (!len && !more)) {
+ if (sp->remain <= 0 ||
+ (msg_data_left(msg) == 0 && !more)) {
struct rxrpc_connection *conn = call->conn;
uint32_t seq;
size_t pad;
sp->hdr.serviceId = conn->service_id;
sp->hdr.flags = conn->out_clientflag;
- if (len == 0 && !more)
+ if (msg_data_left(msg) == 0 && !more)
sp->hdr.flags |= RXRPC_LAST_PACKET;
else if (CIRC_SPACE(call->acks_head, call->acks_tail,
call->acks_winsz) > 1)
memcpy(skb->head, &sp->hdr,
sizeof(struct rxrpc_header));
- rxrpc_queue_packet(call, skb, !iov_iter_count(&msg->msg_iter) && !more);
+ rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
skb = NULL;
}
- }
+ } while (msg_data_left(msg) > 0);
success:
ret = copied;
/* ------------------------------------------------------------- */
+static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ net_inc_ingress_queue();
+
+ return 0;
+}
+
static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_qdisc_data *p = qdisc_priv(sch);
tcf_destroy_chain(&p->filter_list);
+ net_dec_ingress_queue();
}
static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
.id = "ingress",
.priv_size = sizeof(struct ingress_qdisc_data),
.enqueue = ingress_enqueue,
+ .init = ingress_init,
.destroy = ingress_destroy,
.dump = ingress_dump,
.owner = THIS_MODULE,
tfifo_dequeue:
skb = __skb_dequeue(&sch->q);
if (skb) {
-deliver:
qdisc_qstats_backlog_dec(sch, skb);
+deliver:
qdisc_unthrottled(sch);
qdisc_bstats_update(sch, skb);
return skb;
rb_erase(p, &q->t_root);
sch->q.qlen--;
+ qdisc_qstats_backlog_dec(sch, skb);
skb->next = NULL;
skb->prev = NULL;
skb->tstamp = netem_skb_cb(skb)->tstamp_save;
}
EXPORT_SYMBOL(__sock_tx_timestamp);
-static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg,
- size_t size)
+static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
- return sock->ops->sendmsg(sock, msg, size);
+ int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
+ BUG_ON(ret == -EIOCBQUEUED);
+ return ret;
}
-int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+int sock_sendmsg(struct socket *sock, struct msghdr *msg)
{
- int err = security_socket_sendmsg(sock, msg, size);
+ int err = security_socket_sendmsg(sock, msg,
+ msg_data_left(msg));
- return err ?: sock_sendmsg_nosec(sock, msg, size);
+ return err ?: sock_sendmsg_nosec(sock, msg);
}
EXPORT_SYMBOL(sock_sendmsg);
int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
- mm_segment_t oldfs = get_fs();
- int result;
-
- set_fs(KERNEL_DS);
- /*
- * the following is safe, since for compiler definitions of kvec and
- * iovec are identical, yielding the same in-core layout and alignment
- */
- iov_iter_init(&msg->msg_iter, WRITE, (struct iovec *)vec, num, size);
- result = sock_sendmsg(sock, msg, size);
- set_fs(oldfs);
- return result;
+ iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+ return sock_sendmsg(sock, msg);
}
EXPORT_SYMBOL(kernel_sendmsg);
mm_segment_t oldfs = get_fs();
int result;
+ iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
set_fs(KERNEL_DS);
- /*
- * the following is safe, since for compiler definitions of kvec and
- * iovec are identical, yielding the same in-core layout and alignment
- */
- iov_iter_init(&msg->msg_iter, READ, (struct iovec *)vec, num, size);
result = sock_recvmsg(sock, msg, size, flags);
set_fs(oldfs);
return result;
if (iocb->ki_pos != 0)
return -ESPIPE;
- if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
+ if (!iov_iter_count(to)) /* Match SYS5 behaviour */
return 0;
- res = sock_recvmsg(sock, &msg, iocb->ki_nbytes, msg.msg_flags);
+ res = sock_recvmsg(sock, &msg, iov_iter_count(to), msg.msg_flags);
*to = msg.msg_iter;
return res;
}
if (sock->type == SOCK_SEQPACKET)
msg.msg_flags |= MSG_EOR;
- res = sock_sendmsg(sock, &msg, iocb->ki_nbytes);
+ res = sock_sendmsg(sock, &msg);
*from = msg.msg_iter;
return res;
}
struct iovec iov;
int fput_needed;
- if (len > INT_MAX)
- len = INT_MAX;
- if (unlikely(!access_ok(VERIFY_READ, buff, len)))
- return -EFAULT;
+ err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
+ if (unlikely(err))
+ return err;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
- iov.iov_base = buff;
- iov.iov_len = len;
msg.msg_name = NULL;
- iov_iter_init(&msg.msg_iter, WRITE, &iov, 1, len);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_namelen = 0;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
msg.msg_flags = flags;
- err = sock_sendmsg(sock, &msg, len);
+ err = sock_sendmsg(sock, &msg);
out_put:
fput_light(sock->file, fput_needed);
int err, err2;
int fput_needed;
- if (size > INT_MAX)
- size = INT_MAX;
- if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size)))
- return -EFAULT;
+ err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
+ if (unlikely(err))
+ return err;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
msg.msg_control = NULL;
msg.msg_controllen = 0;
- iov.iov_len = size;
- iov.iov_base = ubuf;
- iov_iter_init(&msg.msg_iter, READ, &iov, 1, size);
/* Save some cycles and don't copy the address if not needed */
msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
/* We assume all kernel code knows the size of sockaddr_storage */
msg.msg_namelen = 0;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
- err = sock_recvmsg(sock, &msg, size, flags);
+ err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags);
if (err >= 0 && addr != NULL) {
err2 = move_addr_to_user(&address,
unsigned int name_len;
};
-static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
- struct user_msghdr __user *umsg,
- struct sockaddr __user **save_addr,
- struct iovec **iov)
+static int copy_msghdr_from_user(struct msghdr *kmsg,
+ struct user_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ struct iovec **iov)
{
struct sockaddr __user *uaddr;
struct iovec __user *uiov;
kmsg->msg_iocb = NULL;
- err = rw_copy_check_uvector(save_addr ? READ : WRITE,
- uiov, nr_segs,
- UIO_FASTIOV, *iov, iov);
- if (err >= 0)
- iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
- *iov, nr_segs, err);
- return err;
+ return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
+ UIO_FASTIOV, iov, &kmsg->msg_iter);
}
static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
__attribute__ ((aligned(sizeof(__kernel_size_t))));
/* 20 is size of ipv6_pktinfo */
unsigned char *ctl_buf = ctl;
- int ctl_len, total_len;
+ int ctl_len;
ssize_t err;
msg_sys->msg_name = &address;
else
err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
if (err < 0)
- goto out_freeiov;
- total_len = err;
+ return err;
err = -ENOBUFS;
used_address->name_len == msg_sys->msg_namelen &&
!memcmp(&used_address->name, msg_sys->msg_name,
used_address->name_len)) {
- err = sock_sendmsg_nosec(sock, msg_sys, total_len);
+ err = sock_sendmsg_nosec(sock, msg_sys);
goto out_freectl;
}
- err = sock_sendmsg(sock, msg_sys, total_len);
+ err = sock_sendmsg(sock, msg_sys);
/*
* If this is sendmmsg() and sending to current destination address was
* successful, remember it.
if (ctl_buf != ctl)
sock_kfree_s(sock->sk, ctl_buf, ctl_len);
out_freeiov:
- if (iov != iovstack)
- kfree(iov);
+ kfree(iov);
return err;
}
else
err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
if (err < 0)
- goto out_freeiov;
- total_len = err;
+ return err;
+ total_len = iov_iter_count(&msg_sys->msg_iter);
cmsg_ptr = (unsigned long)msg_sys->msg_control;
msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
err = len;
out_freeiov:
- if (iov != iovstack)
- kfree(iov);
+ kfree(iov);
return err;
}
svc_set_cmsg_data(rqstp, cmh);
- if (sock_sendmsg(sock, &msg, 0) < 0)
+ if (sock_sendmsg(sock, &msg) < 0)
goto out;
}
skb->sp->xvec[skb->sp->len++] = x;
- if (xfrm_tunnel_check(skb, x, family)) {
- XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
- goto drop;
- }
-
spin_lock(&x->lock);
if (unlikely(x->km.state == XFRM_STATE_ACQ)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
spin_unlock(&x->lock);
+ if (xfrm_tunnel_check(skb, x, family)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
+ goto drop;
+ }
+
seq_hi = htonl(xfrm_replay_seqhi(x, seq));
XFRM_SKB_CB(skb)->seq.input.low = seq;
switch (sclass) {
case SECCLASS_NETLINK_ROUTE_SOCKET:
+ /* RTM_MAX always point to RTM_SETxxxx, ie RTM_NEWxxx + 3 */
+ BUILD_BUG_ON(RTM_MAX != (RTM_NEWNSID + 3));
err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
sizeof(nlmsg_route_perms));
break;
break;
case SECCLASS_NETLINK_XFRM_SOCKET:
+ BUILD_BUG_ON(XFRM_MSG_MAX != XFRM_MSG_MAPPING);
err = nlmsg_perm(nlmsg_type, perm, nlmsg_xfrm_perms,
sizeof(nlmsg_xfrm_perms));
break;
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/pm_qos.h>
-#include <linux/aio.h>
#include <linux/io.h>
#include <linux/dma-mapping.h>
#include <sound/core.h>
#include <sound/pcm_params.h>
#include <sound/timer.h>
#include <sound/minors.h>
+#include <linux/uio.h>
/*
* Compatibility