@@ -10,6 +10,7 @@
#include <uapi/linux/uio.h>
struct page;
+struct address_space;
struct pipe_inode_info;
struct kvec {
@@ -24,6 +25,7 @@ enum iter_type {
ITER_BVEC = 16,
ITER_PIPE = 32,
ITER_DISCARD = 64,
+ ITER_XARRAY = 128,
};
struct iov_iter {
@@ -39,6 +41,7 @@ struct iov_iter {
const struct iovec *iov;
const struct kvec *kvec;
const struct bio_vec *bvec;
+ struct xarray *xarray;
struct pipe_inode_info *pipe;
};
union {
@@ -47,6 +50,7 @@ struct iov_iter {
unsigned int head;
unsigned int start_head;
};
+ loff_t xarray_start;
};
};
@@ -80,6 +84,11 @@ static inline bool iov_iter_is_discard(const struct iov_iter *i)
return iov_iter_type(i) == ITER_DISCARD;
}
+static inline bool iov_iter_is_xarray(const struct iov_iter *i)
+{
+ return iov_iter_type(i) == ITER_XARRAY;
+}
+
static inline unsigned char iov_iter_rw(const struct iov_iter *i)
{
return i->type & (READ | WRITE);
@@ -221,6 +230,8 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_
void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
size_t count);
void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
+void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray,
+ loff_t start, size_t count);
ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
size_t maxsize, unsigned maxpages, size_t *start);
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
@@ -76,7 +76,44 @@
} \
}
-#define iterate_all_kinds(i, n, v, I, B, K) { \
+#define iterate_xarray(i, n, __v, skip, STEP) { \
+ struct page *head = NULL; \
+ size_t wanted = n, seg, offset; \
+ loff_t start = i->xarray_start + skip; \
+ pgoff_t index = start >> PAGE_SHIFT; \
+ int j; \
+ \
+ XA_STATE(xas, i->xarray, index); \
+ \
+ rcu_read_lock(); \
+ xas_for_each(&xas, head, ULONG_MAX) { \
+ if (xas_retry(&xas, head)) \
+ continue; \
+ if (WARN_ON(xa_is_value(head))) \
+ break; \
+ if (WARN_ON(PageHuge(head))) \
+ break; \
+ for (j = (head->index < index) ? index - head->index : 0; \
+ j < thp_nr_pages(head); j++) { \
+ __v.bv_page = head + j; \
+ offset = (i->xarray_start + skip) & ~PAGE_MASK; \
+ seg = PAGE_SIZE - offset; \
+ __v.bv_offset = offset; \
+ __v.bv_len = min(n, seg); \
+ (void)(STEP); \
+ n -= __v.bv_len; \
+ skip += __v.bv_len; \
+ if (n == 0) \
+ break; \
+ } \
+ if (n == 0) \
+ break; \
+ } \
+ rcu_read_unlock(); \
+ n = wanted - n; \
+}
+
+#define iterate_all_kinds(i, n, v, I, B, K, X) { \
if (likely(n)) { \
size_t skip = i->iov_offset; \
if (unlikely(i->type & ITER_BVEC)) { \
@@ -88,6 +125,9 @@
struct kvec v; \
iterate_kvec(i, n, v, kvec, skip, (K)) \
} else if (unlikely(i->type & ITER_DISCARD)) { \
+ } else if (unlikely(i->type & ITER_XARRAY)) { \
+ struct bio_vec v; \
+ iterate_xarray(i, n, v, skip, (X)); \
} else { \
const struct iovec *iov; \
struct iovec v; \
@@ -96,7 +136,7 @@
} \
}
-#define iterate_and_advance(i, n, v, I, B, K) { \
+#define iterate_and_advance(i, n, v, I, B, K, X) { \
if (unlikely(i->count < n)) \
n = i->count; \
if (i->count) { \
@@ -121,6 +161,9 @@
i->kvec = kvec; \
} else if (unlikely(i->type & ITER_DISCARD)) { \
skip += n; \
+ } else if (unlikely(i->type & ITER_XARRAY)) { \
+ struct bio_vec v; \
+ iterate_xarray(i, n, v, skip, (X)) \
} else { \
const struct iovec *iov; \
struct iovec v; \
@@ -622,7 +665,9 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
memcpy_to_page(v.bv_page, v.bv_offset,
(from += v.bv_len) - v.bv_len, v.bv_len),
- memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
+ memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
+ memcpy_to_page(v.bv_page, v.bv_offset,
+ (from += v.bv_len) - v.bv_len, v.bv_len)
)
return bytes;
@@ -738,6 +783,16 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
bytes = curr_addr - s_addr - rem;
return bytes;
}
+ }),
+ ({
+ rem = copy_mc_to_page(v.bv_page, v.bv_offset,
+ (from += v.bv_len) - v.bv_len, v.bv_len);
+ if (rem) {
+ curr_addr = (unsigned long) from;
+ bytes = curr_addr - s_addr - rem;
+ rcu_read_unlock();
+ return bytes;
+ }
})
)
@@ -759,7 +814,9 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
v.bv_offset, v.bv_len),
- memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
+ memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
+ memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
+ v.bv_offset, v.bv_len)
)
return bytes;
@@ -785,7 +842,9 @@ bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
0;}),
memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
v.bv_offset, v.bv_len),
- memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
+ memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
+ memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
+ v.bv_offset, v.bv_len)
)
iov_iter_advance(i, bytes);
@@ -805,7 +864,9 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
v.iov_base, v.iov_len),
memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
v.bv_offset, v.bv_len),
- memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
+ memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
+ memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
+ v.bv_offset, v.bv_len)
)
return bytes;
@@ -840,7 +901,9 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
v.bv_offset, v.bv_len),
memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
- v.iov_len)
+ v.iov_len),
+ memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
+ v.bv_offset, v.bv_len)
)
return bytes;
@@ -864,7 +927,9 @@ bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
0;}),
memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
v.bv_offset, v.bv_len),
- memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
+ memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
+ memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
+ v.bv_offset, v.bv_len)
)
iov_iter_advance(i, bytes);
@@ -901,7 +966,7 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
{
if (unlikely(!page_copy_sane(page, offset, bytes)))
return 0;
- if (i->type & (ITER_BVEC|ITER_KVEC)) {
+ if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {
void *kaddr = kmap_atomic(page);
size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
kunmap_atomic(kaddr);
@@ -924,7 +989,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
WARN_ON(1);
return 0;
}
- if (i->type & (ITER_BVEC|ITER_KVEC)) {
+ if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {
void *kaddr = kmap_atomic(page);
size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
kunmap_atomic(kaddr);
@@ -968,7 +1033,8 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
iterate_and_advance(i, bytes, v,
clear_user(v.iov_base, v.iov_len),
memzero_page(v.bv_page, v.bv_offset, v.bv_len),
- memset(v.iov_base, 0, v.iov_len)
+ memset(v.iov_base, 0, v.iov_len),
+ memzero_page(v.bv_page, v.bv_offset, v.bv_len)
)
return bytes;
@@ -992,7 +1058,9 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
v.bv_offset, v.bv_len),
- memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
+ memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
+ memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
+ v.bv_offset, v.bv_len)
)
kunmap_atomic(kaddr);
return bytes;
@@ -1078,11 +1146,16 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
i->count -= size;
return;
}
+ if (unlikely(iov_iter_is_xarray(i))) {
+ i->iov_offset += size;
+ i->count -= size;
+ return;
+ }
if (iov_iter_is_bvec(i)) {
iov_iter_bvec_advance(i, size);
return;
}
- iterate_and_advance(i, size, v, 0, 0, 0)
+ iterate_and_advance(i, size, v, 0, 0, 0, 0)
}
EXPORT_SYMBOL(iov_iter_advance);
@@ -1126,7 +1199,12 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
return;
}
unroll -= i->iov_offset;
- if (iov_iter_is_bvec(i)) {
+ if (iov_iter_is_xarray(i)) {
+ BUG(); /* We should never go beyond the start of the specified
+ * range since we might then be straying into pages that
+ * aren't pinned.
+ */
+ } else if (iov_iter_is_bvec(i)) {
const struct bio_vec *bvec = i->bvec;
while (1) {
size_t n = (--bvec)->bv_len;
@@ -1163,9 +1241,9 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
return i->count; // it is a silly place, anyway
if (i->nr_segs == 1)
return i->count;
- if (unlikely(iov_iter_is_discard(i)))
+ if (unlikely(iov_iter_is_discard(i) || iov_iter_is_xarray(i)))
return i->count;
- else if (iov_iter_is_bvec(i))
+ if (iov_iter_is_bvec(i))
return min(i->count, i->bvec->bv_len - i->iov_offset);
else
return min(i->count, i->iov->iov_len - i->iov_offset);
@@ -1213,6 +1291,31 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
}
EXPORT_SYMBOL(iov_iter_pipe);
+/**
+ * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
+ * @i: The iterator to initialise.
+ * @direction: The direction of the transfer.
+ * @xarray: The xarray to access.
+ * @start: The start file position.
+ * @count: The size of the I/O buffer in bytes.
+ *
+ * Set up an I/O iterator to either draw data out of the pages attached to an
+ * inode or to inject data into those pages. The pages *must* be prevented
+ * from evaporation, either by taking a ref on them or locking them by the
+ * caller.
+ */
+void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
+ struct xarray *xarray, loff_t start, size_t count)
+{
+ BUG_ON(direction & ~1);
+ i->type = ITER_XARRAY | (direction & (READ | WRITE));
+ i->xarray = xarray;
+ i->xarray_start = start;
+ i->count = count;
+ i->iov_offset = 0;
+}
+EXPORT_SYMBOL(iov_iter_xarray);
+
/**
* iov_iter_discard - Initialise an I/O iterator that discards data
* @i: The iterator to initialise.
@@ -1246,7 +1349,8 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
iterate_all_kinds(i, size, v,
(res |= (unsigned long)v.iov_base | v.iov_len, 0),
res |= v.bv_offset | v.bv_len,
- res |= (unsigned long)v.iov_base | v.iov_len
+ res |= (unsigned long)v.iov_base | v.iov_len,
+ res |= v.bv_offset | v.bv_len
)
return res;
}
@@ -1268,7 +1372,9 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
(res |= (!res ? 0 : (unsigned long)v.bv_offset) |
(size != v.bv_len ? size : 0)),
(res |= (!res ? 0 : (unsigned long)v.iov_base) |
- (size != v.iov_len ? size : 0))
+ (size != v.iov_len ? size : 0)),
+ (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
+ (size != v.bv_len ? size : 0))
);
return res;
}
@@ -1318,6 +1424,75 @@ static ssize_t pipe_get_pages(struct iov_iter *i,
return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
}
+static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
+ pgoff_t index, unsigned int nr_pages)
+{
+ XA_STATE(xas, xa, index);
+ struct page *page;
+ unsigned int ret = 0;
+
+ rcu_read_lock();
+ for (page = xas_load(&xas); page; page = xas_next(&xas)) {
+ if (xas_retry(&xas, page))
+ continue;
+
+ /* Has the page moved or been split? */
+ if (unlikely(page != xas_reload(&xas))) {
+ xas_reset(&xas);
+ continue;
+ }
+
+ pages[ret] = find_subpage(page, xas.xa_index);
+ get_page(pages[ret]);
+ if (++ret == nr_pages)
+ break;
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+static ssize_t iter_xarray_get_pages(struct iov_iter *i,
+ struct page **pages, size_t maxsize,
+ unsigned maxpages, size_t *_start_offset)
+{
+ unsigned nr, offset;
+ pgoff_t index, count;
+ size_t size = maxsize, actual;
+ loff_t pos;
+
+ if (!size || !maxpages)
+ return 0;
+
+ pos = i->xarray_start + i->iov_offset;
+ index = pos >> PAGE_SHIFT;
+ offset = pos & ~PAGE_MASK;
+ *_start_offset = offset;
+
+ count = 1;
+ if (size > PAGE_SIZE - offset) {
+ size -= PAGE_SIZE - offset;
+ count += size >> PAGE_SHIFT;
+ size &= ~PAGE_MASK;
+ if (size)
+ count++;
+ }
+
+ if (count > maxpages)
+ count = maxpages;
+
+ nr = iter_xarray_populate_pages(pages, i->xarray, index, count);
+ if (nr == 0)
+ return 0;
+
+ actual = PAGE_SIZE * nr;
+ actual -= offset;
+ if (nr == count && size > 0) {
+ unsigned last_offset = (nr > 1) ? 0 : offset;
+ actual -= PAGE_SIZE - (last_offset + size);
+ }
+ return actual;
+}
+
ssize_t iov_iter_get_pages(struct iov_iter *i,
struct page **pages, size_t maxsize, unsigned maxpages,
size_t *start)
@@ -1327,6 +1502,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
if (unlikely(iov_iter_is_pipe(i)))
return pipe_get_pages(i, pages, maxsize, maxpages, start);
+ if (unlikely(iov_iter_is_xarray(i)))
+ return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
if (unlikely(iov_iter_is_discard(i)))
return -EFAULT;
@@ -1353,7 +1530,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
return v.bv_len;
}),({
return -EFAULT;
- })
+ }),
+ 0
)
return 0;
}
@@ -1397,6 +1575,51 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
return n;
}
+static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
+ struct page ***pages, size_t maxsize,
+ size_t *_start_offset)
+{
+ struct page **p;
+ unsigned nr, offset;
+ pgoff_t index, count;
+ size_t size = maxsize, actual;
+ loff_t pos;
+
+ if (!size)
+ return 0;
+
+ pos = i->xarray_start + i->iov_offset;
+ index = pos >> PAGE_SHIFT;
+ offset = pos & ~PAGE_MASK;
+ *_start_offset = offset;
+
+ count = 1;
+ if (size > PAGE_SIZE - offset) {
+ size -= PAGE_SIZE - offset;
+ count += size >> PAGE_SHIFT;
+ size &= ~PAGE_MASK;
+ if (size)
+ count++;
+ }
+
+ p = get_pages_array(count);
+ if (!p)
+ return -ENOMEM;
+ *pages = p;
+
+ nr = iter_xarray_populate_pages(p, i->xarray, index, count);
+ if (nr == 0)
+ return 0;
+
+ actual = PAGE_SIZE * nr;
+ actual -= offset;
+ if (nr == count && size > 0) {
+ unsigned last_offset = (nr > 1) ? 0 : offset;
+ actual -= PAGE_SIZE - (last_offset + size);
+ }
+ return actual;
+}
+
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
struct page ***pages, size_t maxsize,
size_t *start)
@@ -1408,6 +1631,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
if (unlikely(iov_iter_is_pipe(i)))
return pipe_get_pages_alloc(i, pages, maxsize, start);
+ if (unlikely(iov_iter_is_xarray(i)))
+ return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
if (unlikely(iov_iter_is_discard(i)))
return -EFAULT;
@@ -1440,7 +1665,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
return v.bv_len;
}),({
return -EFAULT;
- })
+ }), 0
)
return 0;
}
@@ -1478,6 +1703,13 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
v.iov_base, v.iov_len,
sum, off);
off += v.iov_len;
+ }), ({
+ char *p = kmap_atomic(v.bv_page);
+ sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
+ p + v.bv_offset, v.bv_len,
+ sum, off);
+ kunmap_atomic(p);
+ off += v.bv_len;
})
)
*csum = sum;
@@ -1519,6 +1751,13 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
v.iov_base, v.iov_len,
sum, off);
off += v.iov_len;
+ }), ({
+ char *p = kmap_atomic(v.bv_page);
+ sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
+ p + v.bv_offset, v.bv_len,
+ sum, off);
+ kunmap_atomic(p);
+ off += v.bv_len;
})
)
*csum = sum;
@@ -1565,6 +1804,13 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
(from += v.iov_len) - v.iov_len,
v.iov_len, sum, off);
off += v.iov_len;
+ }), ({
+ char *p = kmap_atomic(v.bv_page);
+ sum = csum_and_memcpy(p + v.bv_offset,
+ (from += v.bv_len) - v.bv_len,
+ v.bv_len, sum, off);
+ kunmap_atomic(p);
+ off += v.bv_len;
})
)
csstate->csum = sum;
@@ -1615,6 +1861,21 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
npages = pipe_space_for_user(iter_head, pipe->tail, pipe);
if (npages >= maxpages)
return maxpages;
+ } else if (unlikely(iov_iter_is_xarray(i))) {
+ unsigned offset;
+
+ offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK;
+
+ npages = 1;
+ if (size > PAGE_SIZE - offset) {
+ size -= PAGE_SIZE - offset;
+ npages += size >> PAGE_SHIFT;
+ size &= ~PAGE_MASK;
+ if (size)
+ npages++;
+ }
+ if (npages >= maxpages)
+ return maxpages;
} else iterate_all_kinds(i, size, v, ({
unsigned long p = (unsigned long)v.iov_base;
npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
@@ -1631,7 +1892,8 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
- p / PAGE_SIZE;
if (npages >= maxpages)
return maxpages;
- })
+ }),
+ 0
)
return npages;
}
@@ -1644,7 +1906,7 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
WARN_ON(1);
return NULL;
}
- if (unlikely(iov_iter_is_discard(new)))
+ if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
return NULL;
if (iov_iter_is_bvec(new))
return new->bvec = kmemdup(new->bvec,
@@ -1849,7 +2111,12 @@ int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
kunmap(v.bv_page);
err;}), ({
w = v;
- err = f(&w, context);})
+ err = f(&w, context);}), ({
+ w.iov_base = kmap(v.bv_page) + v.bv_offset;
+ w.iov_len = v.bv_len;
+ err = f(&w, context);
+ kunmap(v.bv_page);
+ err;})
)
return err;
}