@@ -901,6 +901,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
else
ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
&zonefs_write_dio_ops, 0, 0);
+
if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
(ret > 0 || ret == -EIOCBQUEUED)) {
if (ret > 0)
@@ -1189,6 +1190,171 @@ static int zonefs_file_release(struct inode *inode, struct file *file)
return 0;
}
+static int zonefs_is_file_size_ok(struct inode *src_inode, struct inode *dst_inode,
+ loff_t src_off, loff_t dst_off, size_t len)
+{
+ loff_t size, endoff;
+
+ size = i_size_read(src_inode);
+ /* Don't copy beyond source file EOF. */
+ if (src_off + len > size) {
+ zonefs_err(src_inode->i_sb, "Copy beyond EOF (%llu + %zu > %llu)\n",
+ src_off, len, size);
+ return -EOPNOTSUPP;
+ }
+
+ endoff = dst_off + len;
+ if (inode_newsize_ok(dst_inode, endoff))
+ return -EOPNOTSUPP;
+
+
+ return 0;
+}
+static ssize_t __zonefs_send_copy(struct zonefs_inode_info *src_zi, loff_t src_off,
+ struct zonefs_inode_info *dst_zi, loff_t dst_off, size_t len)
+{
+ struct block_device *src_bdev = src_zi->i_vnode.i_sb->s_bdev;
+ struct block_device *dst_bdev = dst_zi->i_vnode.i_sb->s_bdev;
+ struct range_entry *rlist;
+ int ret = -EIO;
+
+ rlist = kmalloc(sizeof(*rlist), GFP_KERNEL);
+ rlist[0].dst = (dst_zi->i_zsector << SECTOR_SHIFT) + dst_off;
+ rlist[0].src = (src_zi->i_zsector << SECTOR_SHIFT) + src_off;
+ rlist[0].len = len;
+ rlist[0].comp_len = 0;
+ ret = blkdev_issue_copy(src_bdev, 1, rlist, dst_bdev, GFP_KERNEL);
+ if (ret) {
+ if (rlist[0].comp_len != len) {
+ ret = rlist[0].comp_len;
+ kfree(rlist);
+ return ret;
+ }
+ }
+ kfree(rlist);
+ return len;
+}
+static ssize_t __zonefs_copy_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off,
+ size_t len, unsigned int flags)
+{
+ struct inode *src_inode = file_inode(src_file);
+ struct inode *dst_inode = file_inode(dst_file);
+ struct zonefs_inode_info *src_zi = ZONEFS_I(src_inode);
+ struct zonefs_inode_info *dst_zi = ZONEFS_I(dst_inode);
+ struct block_device *src_bdev = src_inode->i_sb->s_bdev;
+ struct block_device *dst_bdev = dst_inode->i_sb->s_bdev;
+ struct super_block *src_sb = src_inode->i_sb;
+ struct zonefs_sb_info *src_sbi = ZONEFS_SB(src_sb);
+ struct super_block *dst_sb = dst_inode->i_sb;
+ struct zonefs_sb_info *dst_sbi = ZONEFS_SB(dst_sb);
+ ssize_t ret = -EIO, bytes;
+
+ if (src_bdev != dst_bdev) {
+ zonefs_err(src_sb, "Copying files across two devices\n");
+ return -EXDEV;
+ }
+
+ /*
+ * Some of the checks below will return -EOPNOTSUPP,
+ * which will force a generic copy
+ */
+
+ if (!(src_sbi->s_mount_opts & ZONEFS_MNTOPT_COPY_FILE)
+ || !(dst_sbi->s_mount_opts & ZONEFS_MNTOPT_COPY_FILE))
+ return -EOPNOTSUPP;
+
+ /* Start by sync'ing the source and destination files ifor conv zones */
+ if (src_zi->i_ztype == ZONEFS_ZTYPE_CNV) {
+ ret = file_write_and_wait_range(src_file, src_off, (src_off + len));
+ if (ret < 0) {
+ zonefs_err(src_sb, "failed to write source file (%zd)\n", ret);
+ goto out;
+ }
+ }
+ if (dst_zi->i_ztype == ZONEFS_ZTYPE_CNV) {
+ ret = file_write_and_wait_range(dst_file, dst_off, (dst_off + len));
+ if (ret < 0) {
+ zonefs_err(dst_sb, "failed to write destination file (%zd)\n", ret);
+ goto out;
+ }
+ }
+ mutex_lock(&dst_zi->i_truncate_mutex);
+ if (len > dst_zi->i_max_size - dst_zi->i_wpoffset) {
+ /* Adjust length */
+ len -= dst_zi->i_max_size - dst_zi->i_wpoffset;
+ if (len <= 0) {
+ mutex_unlock(&dst_zi->i_truncate_mutex);
+ return -EOPNOTSUPP;
+ }
+ }
+ if (dst_off != dst_zi->i_wpoffset) {
+ mutex_unlock(&dst_zi->i_truncate_mutex);
+ return -EOPNOTSUPP; /* copy not at zone write ptr */
+ }
+ mutex_lock(&src_zi->i_truncate_mutex);
+ ret = zonefs_is_file_size_ok(src_inode, dst_inode, src_off, dst_off, len);
+ if (ret < 0) {
+ mutex_unlock(&src_zi->i_truncate_mutex);
+ mutex_unlock(&dst_zi->i_truncate_mutex);
+ goto out;
+ }
+ mutex_unlock(&src_zi->i_truncate_mutex);
+
+ /* Drop dst file cached pages for a conv zone*/
+ if (dst_zi->i_ztype == ZONEFS_ZTYPE_CNV) {
+ ret = invalidate_inode_pages2_range(dst_inode->i_mapping,
+ dst_off >> PAGE_SHIFT,
+ (dst_off + len) >> PAGE_SHIFT);
+ if (ret < 0) {
+ zonefs_err(dst_sb, "Failed to invalidate inode pages (%zd)\n", ret);
+ ret = 0;
+ }
+ }
+ bytes = __zonefs_send_copy(src_zi, src_off, dst_zi, dst_off, len);
+ ret += bytes;
+
+ file_update_time(dst_file);
+ zonefs_update_stats(dst_inode, dst_off + bytes);
+ zonefs_i_size_write(dst_inode, dst_off + bytes);
+ dst_zi->i_wpoffset += bytes;
+ mutex_unlock(&dst_zi->i_truncate_mutex);
+
+
+
+ /*
+ * if we still have some bytes left, do splice copy
+ */
+ if (bytes && (bytes < len)) {
+ zonefs_info(src_sb, "Final partial copy of %zu bytes\n", len);
+ bytes = do_splice_direct(src_file, &src_off, dst_file,
+ &dst_off, len, flags);
+ if (bytes > 0)
+ ret += bytes;
+ else
+ zonefs_info(src_sb, "Failed partial copy (%zd)\n", bytes);
+ }
+
+out:
+
+ return ret;
+}
+
+static ssize_t zonefs_copy_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off,
+ size_t len, unsigned int flags)
+{
+ ssize_t ret;
+
+ ret = __zonefs_copy_file_range(src_file, src_off, dst_file, dst_off,
+ len, flags);
+
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = generic_copy_file_range(src_file, src_off, dst_file,
+ dst_off, len, flags);
+ return ret;
+}
+
static const struct file_operations zonefs_file_operations = {
.open = zonefs_file_open,
.release = zonefs_file_release,
@@ -1200,6 +1366,7 @@ static const struct file_operations zonefs_file_operations = {
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.iopoll = iocb_bio_iopoll,
+ .copy_file_range = zonefs_copy_file_range,
};
static struct kmem_cache *zonefs_inode_cachep;
@@ -1262,7 +1429,7 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
enum {
Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair,
- Opt_explicit_open, Opt_err,
+ Opt_explicit_open, Opt_no_copy_offload, Opt_err,
};
static const match_table_t tokens = {
@@ -1271,6 +1438,7 @@ static const match_table_t tokens = {
{ Opt_errors_zol, "errors=zone-offline"},
{ Opt_errors_repair, "errors=repair"},
{ Opt_explicit_open, "explicit-open" },
+ { Opt_no_copy_offload, "no_copy_offload" },
{ Opt_err, NULL}
};
@@ -1280,6 +1448,7 @@ static int zonefs_parse_options(struct super_block *sb, char *options)
substring_t args[MAX_OPT_ARGS];
char *p;
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_COPY_FILE;
if (!options)
return 0;
@@ -1310,6 +1479,9 @@ static int zonefs_parse_options(struct super_block *sb, char *options)
case Opt_explicit_open:
sbi->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN;
break;
+ case Opt_no_copy_offload:
+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_COPY_FILE;
+ break;
default:
return -EINVAL;
}
@@ -1330,6 +1502,8 @@ static int zonefs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",errors=zone-offline");
if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR)
seq_puts(seq, ",errors=repair");
+ if (sbi->s_mount_opts & ZONEFS_MNTOPT_COPY_FILE)
+ seq_puts(seq, ",copy_offload");
return 0;
}
@@ -1769,6 +1943,8 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
atomic_set(&sbi->s_active_seq_files, 0);
sbi->s_max_active_seq_files = bdev_max_active_zones(sb->s_bdev);
+ /* set copy support by default */
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_COPY_FILE;
ret = zonefs_read_super(sb);
if (ret)
return ret;
@@ -162,6 +162,7 @@ enum zonefs_features {
(ZONEFS_MNTOPT_ERRORS_RO | ZONEFS_MNTOPT_ERRORS_ZRO | \
ZONEFS_MNTOPT_ERRORS_ZOL | ZONEFS_MNTOPT_ERRORS_REPAIR)
#define ZONEFS_MNTOPT_EXPLICIT_OPEN (1 << 4) /* Explicit open/close of zones on open/close */
+#define ZONEFS_MNTOPT_COPY_FILE (1 << 5) /* enable copy file range offload to kernel */
/*
* In-memory Super block information.