Compare commits

...

2 Commits

Author SHA1 Message Date
Ylarod
e23f15aeb6 update 2023-11-14 11:58:21 +08:00
Ylarod
ae59b080ae ksufs 5.10 test 2023-11-14 11:21:59 +08:00
16 changed files with 11485 additions and 0 deletions

View File

@@ -12,6 +12,7 @@ obj-y += embed_ksud.o
obj-y += kernel_compat.o
obj-y += selinux/
obj-y += ksufs/
# .git is a text file while the module is imported by 'git submodule add'.
ifeq ($(shell test -e $(srctree)/$(src)/../.git; echo $$?),0)
KSU_GIT_VERSION := $(shell cd $(srctree)/$(src); /usr/bin/env PATH="$$PATH":/usr/bin:/usr/local/bin git rev-list --count HEAD)

126
kernel/ksufs/5.10/Kconfig Normal file
View File

@@ -0,0 +1,126 @@
# SPDX-License-Identifier: GPL-2.0-only
config OVERLAY_FS
tristate "Overlay filesystem support"
select EXPORTFS
help
An overlay filesystem combines two filesystems - an 'upper' filesystem
and a 'lower' filesystem. When a name exists in both filesystems, the
object in the 'upper' filesystem is visible while the object in the
'lower' filesystem is either hidden or, in the case of directories,
merged with the 'upper' object.
For more information see Documentation/filesystems/overlayfs.rst
config OVERLAY_FS_REDIRECT_DIR
bool "Overlayfs: turn on redirect directory feature by default"
depends on OVERLAY_FS
help
If this config option is enabled then overlay filesystems will use
redirects when renaming directories by default. In this case it is
still possible to turn off redirects globally with the
"redirect_dir=off" module option or on a filesystem instance basis
with the "redirect_dir=off" mount option.
Note, that redirects are not backward compatible. That is, mounting
an overlay which has redirects on a kernel that doesn't support this
feature will have unexpected results.
If unsure, say N.
config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
bool "Overlayfs: follow redirects even if redirects are turned off"
default y
depends on OVERLAY_FS
help
Disable this to get a possibly more secure configuration, but that
might not be backward compatible with previous kernels.
If backward compatibility is not an issue, then it is safe and
recommended to say N here.
For more information, see Documentation/filesystems/overlayfs.rst
If unsure, say Y.
config OVERLAY_FS_INDEX
bool "Overlayfs: turn on inodes index feature by default"
depends on OVERLAY_FS
help
If this config option is enabled then overlay filesystems will use
the index directory to map lower inodes to upper inodes by default.
In this case it is still possible to turn off index globally with the
"index=off" module option or on a filesystem instance basis with the
"index=off" mount option.
The inodes index feature prevents breaking of lower hardlinks on copy
up.
Note, that the inodes index feature is not backward compatible.
That is, mounting an overlay which has an inodes index on a kernel
that doesn't support this feature will have unexpected results.
If unsure, say N.
config OVERLAY_FS_NFS_EXPORT
bool "Overlayfs: turn on NFS export feature by default"
depends on OVERLAY_FS
depends on OVERLAY_FS_INDEX
depends on !OVERLAY_FS_METACOPY
help
If this config option is enabled then overlay filesystems will use
the index directory to decode overlay NFS file handles by default.
In this case, it is still possible to turn off NFS export support
globally with the "nfs_export=off" module option or on a filesystem
instance basis with the "nfs_export=off" mount option.
The NFS export feature creates an index on copy up of every file and
directory. This full index is used to detect overlay filesystems
inconsistencies on lookup, like redirect from multiple upper dirs to
the same lower dir. The full index may incur some overhead on mount
time, especially when verifying that directory file handles are not
stale.
Note, that the NFS export feature is not backward compatible.
That is, mounting an overlay which has a full index on a kernel
that doesn't support this feature will have unexpected results.
Most users should say N here and enable this feature on a case-by-
case basis with the "nfs_export=on" mount option.
Say N unless you fully understand the consequences.
config OVERLAY_FS_XINO_AUTO
bool "Overlayfs: auto enable inode number mapping"
default n
depends on OVERLAY_FS
depends on 64BIT
help
If this config option is enabled then overlay filesystems will use
unused high bits in undelying filesystem inode numbers to map all
inodes to a unified address space. The mapped 64bit inode numbers
might not be compatible with applications that expect 32bit inodes.
If compatibility with applications that expect 32bit inodes is not an
issue, then it is safe and recommended to say Y here.
For more information, see Documentation/filesystems/overlayfs.rst
If unsure, say N.
config OVERLAY_FS_METACOPY
bool "Overlayfs: turn on metadata only copy up feature by default"
depends on OVERLAY_FS
select OVERLAY_FS_REDIRECT_DIR
help
If this config option is enabled then overlay filesystems will
copy up only metadata where appropriate and data copy up will
happen when a file is opened for WRITE operation. It is still
possible to turn off this feature globally with the "metacopy=off"
module option or on a filesystem instance basis with the
"metacopy=off" mount option.
Note, that this feature is not backward compatible. That is,
mounting an overlay which has metacopy only inodes on a kernel
that doesn't support this feature will have unexpected results.
If unsure, say N.

View File

@@ -0,0 +1,9 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# Makefile for the overlay filesystem.
#
obj-y += ksufs.o
ksufs-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \
copy_up.o export.o

1010
kernel/ksufs/5.10/copy_up.c Normal file

File diff suppressed because it is too large Load Diff

1325
kernel/ksufs/5.10/dir.c Normal file

File diff suppressed because it is too large Load Diff

870
kernel/ksufs/5.10/export.c Normal file
View File

@@ -0,0 +1,870 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Overlayfs NFS export support.
*
* Amir Goldstein <amir73il@gmail.com>
*
* Copyright (C) 2017-2018 CTERA Networks. All Rights Reserved.
*/
#include <linux/fs.h>
#include <linux/cred.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/xattr.h>
#include <linux/exportfs.h>
#include <linux/ratelimit.h>
#include "overlayfs.h"
static int ksu_ovl_encode_maybe_copy_up(struct dentry *dentry)
{
int err;
if (ksu_ovl_dentry_upper(dentry))
return 0;
err = ksu_ovl_want_write(dentry);
if (!err) {
err = ksu_ovl_copy_up(dentry);
ksu_ovl_drop_write(dentry);
}
if (err) {
pr_warn_ratelimited("failed to copy up on encode (%pd2, err=%i)\n",
dentry, err);
}
return err;
}
/*
* Before encoding a non-upper directory file handle from real layer N, we need
* to check if it will be possible to reconnect an overlay dentry from the real
* lower decoded dentry. This is done by following the overlay ancestry up to a
* "layer N connected" ancestor and verifying that all parents along the way are
* "layer N connectable". If an ancestor that is NOT "layer N connectable" is
* found, we need to copy up an ancestor, which is "layer N connectable", thus
* making that ancestor "layer N connected". For example:
*
* layer 1: /a
* layer 2: /a/b/c
*
* The overlay dentry /a is NOT "layer 2 connectable", because if dir /a is
* copied up and renamed, upper dir /a will be indexed by lower dir /a from
* layer 1. The dir /a from layer 2 will never be indexed, so the algorithm (*)
* in ksu_ovl_lookup_real_ancestor() will not be able to lookup a connected overlay
* dentry from the connected lower dentry /a/b/c.
*
* To avoid this problem on decode time, we need to copy up an ancestor of
* /a/b/c, which is "layer 2 connectable", on encode time. That ancestor is
* /a/b. After copy up (and index) of /a/b, it will become "layer 2 connected"
* and when the time comes to decode the file handle from lower dentry /a/b/c,
* ksu_ovl_lookup_real_ancestor() will find the indexed ancestor /a/b and decoding
* a connected overlay dentry will be accomplished.
*
* (*) the algorithm in ksu_ovl_lookup_real_ancestor() can be improved to lookup an
* entry /a in the lower layers above layer N and find the indexed dir /a from
* layer 1. If that improvement is made, then the check for "layer N connected"
* will need to verify there are no redirects in lower layers above N. In the
* example above, /a will be "layer 2 connectable". However, if layer 2 dir /a
* is a target of a layer 1 redirect, then /a will NOT be "layer 2 connectable":
*
* layer 1: /A (redirect = /a)
* layer 2: /a/b/c
*/
/* Return the lowest layer for encoding a connectable file handle */
static int ksu_ovl_connectable_layer(struct dentry *dentry)
{
struct ksu_ovl_entry *oe = KSU_OVL_E(dentry);
/* We can get overlay root from root of any layer */
if (dentry == dentry->d_sb->s_root)
return oe->numlower;
/*
* If it's an unindexed merge dir, then it's not connectable with any
* lower layer
*/
if (ksu_ovl_dentry_upper(dentry) &&
!ksu_ovl_test_flag(KSU_OVL_INDEX, d_inode(dentry)))
return 0;
/* We can get upper/overlay path from indexed/lower dentry */
return oe->lowerstack[0].layer->idx;
}
/*
* @dentry is "connected" if all ancestors up to root or a "connected" ancestor
* have the same uppermost lower layer as the origin's layer. We may need to
* copy up a "connectable" ancestor to make it "connected". A "connected" dentry
* cannot become non "connected", so cache positive result in dentry flags.
*
* Return the connected origin layer or < 0 on error.
*/
static int ksu_ovl_connect_layer(struct dentry *dentry)
{
struct dentry *next, *parent = NULL;
int origin_layer;
int err = 0;
if (WARN_ON(dentry == dentry->d_sb->s_root) ||
WARN_ON(!ksu_ovl_dentry_lower(dentry)))
return -EIO;
origin_layer = KSU_OVL_E(dentry)->lowerstack[0].layer->idx;
if (ksu_ovl_dentry_test_flag(KSU_OVL_E_CONNECTED, dentry))
return origin_layer;
/* Find the topmost origin layer connectable ancestor of @dentry */
next = dget(dentry);
for (;;) {
parent = dget_parent(next);
if (WARN_ON(parent == next)) {
err = -EIO;
break;
}
/*
* If @parent is not origin layer connectable, then copy up
* @next which is origin layer connectable and we are done.
*/
if (ksu_ovl_connectable_layer(parent) < origin_layer) {
err = ksu_ovl_encode_maybe_copy_up(next);
break;
}
/* If @parent is connected or indexed we are done */
if (ksu_ovl_dentry_test_flag(KSU_OVL_E_CONNECTED, parent) ||
ksu_ovl_test_flag(KSU_OVL_INDEX, d_inode(parent)))
break;
dput(next);
next = parent;
}
dput(parent);
dput(next);
if (!err)
ksu_ovl_dentry_set_flag(KSU_OVL_E_CONNECTED, dentry);
return err ?: origin_layer;
}
/*
* We only need to encode origin if there is a chance that the same object was
* encoded pre copy up and then we need to stay consistent with the same
* encoding also after copy up. If non-pure upper is not indexed, then it was
* copied up before NFS export was enabled. In that case we don't need to worry
* about staying consistent with pre copy up encoding and we encode an upper
* file handle. Overlay root dentry is a private case of non-indexed upper.
*
* The following table summarizes the different file handle encodings used for
* different overlay object types:
*
* Object type | Encoding
* --------------------------------
* Pure upper | U
* Non-indexed upper | U
* Indexed upper | L (*)
* Non-upper | L (*)
*
* U = upper file handle
* L = lower file handle
*
* (*) Connecting an overlay dir from real lower dentry is not always
* possible when there are redirects in lower layers and non-indexed merge dirs.
* To mitigate those case, we may copy up the lower dir ancestor before encode
* a lower dir file handle.
*
* Return 0 for upper file handle, > 0 for lower file handle or < 0 on error.
*/
static int ksu_ovl_check_encode_origin(struct dentry *dentry)
{
struct ksu_ovl_fs *ofs = dentry->d_sb->s_fs_info;
/* Upper file handle for pure upper */
if (!ksu_ovl_dentry_lower(dentry))
return 0;
/*
* Upper file handle for non-indexed upper.
*
* Root is never indexed, so if there's an upper layer, encode upper for
* root.
*/
if (ksu_ovl_dentry_upper(dentry) &&
!ksu_ovl_test_flag(KSU_OVL_INDEX, d_inode(dentry)))
return 0;
/*
* Decoding a merge dir, whose origin's ancestor is under a redirected
* lower dir or under a non-indexed upper is not always possible.
* ksu_ovl_connect_layer() will try to make origin's layer "connected" by
* copying up a "connectable" ancestor.
*/
if (d_is_dir(dentry) && ksu_ovl_upper_mnt(ofs))
return ksu_ovl_connect_layer(dentry);
/* Lower file handle for indexed and non-upper dir/non-dir */
return 1;
}
static int ksu_ovl_dentry_to_fid(struct dentry *dentry, u32 *fid, int buflen)
{
struct ksu_ovl_fh *fh = NULL;
int err, enc_lower;
int len;
/*
* Check if we should encode a lower or upper file handle and maybe
* copy up an ancestor to make lower file handle connectable.
*/
err = enc_lower = ksu_ovl_check_encode_origin(dentry);
if (enc_lower < 0)
goto fail;
/* Encode an upper or lower file handle */
fh = ksu_ovl_encode_real_fh(enc_lower ? ksu_ovl_dentry_lower(dentry) :
ksu_ovl_dentry_upper(dentry), !enc_lower);
if (IS_ERR(fh))
return PTR_ERR(fh);
len = KSU_OVL_FH_LEN(fh);
if (len <= buflen)
memcpy(fid, fh, len);
err = len;
out:
kfree(fh);
return err;
fail:
pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i)\n",
dentry, err);
goto out;
}
static int ksu_ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
struct inode *parent)
{
struct dentry *dentry;
int bytes, buflen = *max_len << 2;
/* TODO: encode connectable file handles */
if (parent)
return FILEID_INVALID;
dentry = d_find_any_alias(inode);
if (!dentry)
return FILEID_INVALID;
bytes = ksu_ovl_dentry_to_fid(dentry, fid, buflen);
dput(dentry);
if (bytes <= 0)
return FILEID_INVALID;
*max_len = bytes >> 2;
if (bytes > buflen)
return FILEID_INVALID;
return KSU_OVL_FILEID_V1;
}
/*
* Find or instantiate an overlay dentry from real dentries and index.
*/
static struct dentry *ksu_ovl_obtain_alias(struct super_block *sb,
struct dentry *upper_alias,
struct ksu_ovl_path *lowerpath,
struct dentry *index)
{
struct dentry *lower = lowerpath ? lowerpath->dentry : NULL;
struct dentry *upper = upper_alias ?: index;
struct dentry *dentry;
struct inode *inode;
struct ksu_ovl_entry *oe;
struct ksu_ovl_inode_params oip = {
.lowerpath = lowerpath,
.index = index,
.numlower = !!lower
};
/* We get overlay directory dentries with ksu_ovl_lookup_real() */
if (d_is_dir(upper ?: lower))
return ERR_PTR(-EIO);
oip.upperdentry = dget(upper);
inode = ksu_ovl_get_inode(sb, &oip);
if (IS_ERR(inode)) {
dput(upper);
return ERR_CAST(inode);
}
if (upper)
ksu_ovl_set_flag(KSU_OVL_UPPERDATA, inode);
dentry = d_find_any_alias(inode);
if (dentry)
goto out_iput;
dentry = d_alloc_anon(inode->i_sb);
if (unlikely(!dentry))
goto nomem;
oe = ksu_ovl_alloc_entry(lower ? 1 : 0);
if (!oe)
goto nomem;
if (lower) {
oe->lowerstack->dentry = dget(lower);
oe->lowerstack->layer = lowerpath->layer;
}
dentry->d_fsdata = oe;
if (upper_alias)
ksu_ovl_dentry_set_upper_alias(dentry);
ksu_ovl_dentry_update_reval(dentry, upper,
DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
return d_instantiate_anon(dentry, inode);
nomem:
dput(dentry);
dentry = ERR_PTR(-ENOMEM);
out_iput:
iput(inode);
return dentry;
}
/* Get the upper or lower dentry in stach whose on layer @idx */
static struct dentry *ksu_ovl_dentry_real_at(struct dentry *dentry, int idx)
{
struct ksu_ovl_entry *oe = dentry->d_fsdata;
int i;
if (!idx)
return ksu_ovl_dentry_upper(dentry);
for (i = 0; i < oe->numlower; i++) {
if (oe->lowerstack[i].layer->idx == idx)
return oe->lowerstack[i].dentry;
}
return NULL;
}
/*
* Lookup a child overlay dentry to get a connected overlay dentry whose real
* dentry is @real. If @real is on upper layer, we lookup a child overlay
* dentry with the same name as the real dentry. Otherwise, we need to consult
* index for lookup.
*/
static struct dentry *ksu_ovl_lookup_real_one(struct dentry *connected,
struct dentry *real,
const struct ksu_ovl_layer *layer)
{
struct inode *dir = d_inode(connected);
struct dentry *this, *parent = NULL;
struct name_snapshot name;
int err;
/*
* Lookup child overlay dentry by real name. The dir mutex protects us
* from racing with overlay rename. If the overlay dentry that is above
* real has already been moved to a parent that is not under the
* connected overlay dir, we return -ECHILD and restart the lookup of
* connected real path from the top.
*/
inode_lock_nested(dir, I_MUTEX_PARENT);
err = -ECHILD;
parent = dget_parent(real);
if (ksu_ovl_dentry_real_at(connected, layer->idx) != parent)
goto fail;
/*
* We also need to take a snapshot of real dentry name to protect us
* from racing with underlying layer rename. In this case, we don't
* care about returning ESTALE, only from dereferencing a free name
* pointer because we hold no lock on the real dentry.
*/
take_dentry_name_snapshot(&name, real);
this = lookup_one_len(name.name.name, connected, name.name.len);
release_dentry_name_snapshot(&name);
err = PTR_ERR(this);
if (IS_ERR(this)) {
goto fail;
} else if (!this || !this->d_inode) {
dput(this);
err = -ENOENT;
goto fail;
} else if (ksu_ovl_dentry_real_at(this, layer->idx) != real) {
dput(this);
err = -ESTALE;
goto fail;
}
out:
dput(parent);
inode_unlock(dir);
return this;
fail:
pr_warn_ratelimited("failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
real, layer->idx, connected, err);
this = ERR_PTR(err);
goto out;
}
static struct dentry *ksu_ovl_lookup_real(struct super_block *sb,
struct dentry *real,
const struct ksu_ovl_layer *layer);
/*
* Lookup an indexed or hashed overlay dentry by real inode.
*/
static struct dentry *ksu_ovl_lookup_real_inode(struct super_block *sb,
struct dentry *real,
const struct ksu_ovl_layer *layer)
{
struct ksu_ovl_fs *ofs = sb->s_fs_info;
struct dentry *index = NULL;
struct dentry *this = NULL;
struct inode *inode;
/*
* Decoding upper dir from index is expensive, so first try to lookup
* overlay dentry in inode/dcache.
*/
inode = ksu_ovl_lookup_inode(sb, real, !layer->idx);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (inode) {
this = d_find_any_alias(inode);
iput(inode);
}
/*
* For decoded lower dir file handle, lookup index by origin to check
* if lower dir was copied up and and/or removed.
*/
if (!this && layer->idx && ofs->indexdir && !WARN_ON(!d_is_dir(real))) {
index = ksu_ovl_lookup_index(ofs, NULL, real, false);
if (IS_ERR(index))
return index;
}
/* Get connected upper overlay dir from index */
if (index) {
struct dentry *upper = ksu_ovl_index_upper(ofs, index);
dput(index);
if (IS_ERR_OR_NULL(upper))
return upper;
/*
* ksu_ovl_lookup_real() in lower layer may call recursively once to
* ksu_ovl_lookup_real() in upper layer. The first level call walks
* back lower parents to the topmost indexed parent. The second
* recursive call walks back from indexed upper to the topmost
* connected/hashed upper parent (or up to root).
*/
this = ksu_ovl_lookup_real(sb, upper, &ofs->layers[0]);
dput(upper);
}
if (IS_ERR_OR_NULL(this))
return this;
if (ksu_ovl_dentry_real_at(this, layer->idx) != real) {
dput(this);
this = ERR_PTR(-EIO);
}
return this;
}
/*
* Lookup an indexed or hashed overlay dentry, whose real dentry is an
* ancestor of @real.
*/
static struct dentry *ksu_ovl_lookup_real_ancestor(struct super_block *sb,
struct dentry *real,
const struct ksu_ovl_layer *layer)
{
struct dentry *next, *parent = NULL;
struct dentry *ancestor = ERR_PTR(-EIO);
if (real == layer->mnt->mnt_root)
return dget(sb->s_root);
/* Find the topmost indexed or hashed ancestor */
next = dget(real);
for (;;) {
parent = dget_parent(next);
/*
* Lookup a matching overlay dentry in inode/dentry
* cache or in index by real inode.
*/
ancestor = ksu_ovl_lookup_real_inode(sb, next, layer);
if (ancestor)
break;
if (parent == layer->mnt->mnt_root) {
ancestor = dget(sb->s_root);
break;
}
/*
* If @real has been moved out of the layer root directory,
* we will eventully hit the real fs root. This cannot happen
* by legit overlay rename, so we return error in that case.
*/
if (parent == next) {
ancestor = ERR_PTR(-EXDEV);
break;
}
dput(next);
next = parent;
}
dput(parent);
dput(next);
return ancestor;
}
/*
* Lookup a connected overlay dentry whose real dentry is @real.
* If @real is on upper layer, we lookup a child overlay dentry with the same
* path the real dentry. Otherwise, we need to consult index for lookup.
*/
static struct dentry *ksu_ovl_lookup_real(struct super_block *sb,
struct dentry *real,
const struct ksu_ovl_layer *layer)
{
struct dentry *connected;
int err = 0;
connected = ksu_ovl_lookup_real_ancestor(sb, real, layer);
if (IS_ERR(connected))
return connected;
while (!err) {
struct dentry *next, *this;
struct dentry *parent = NULL;
struct dentry *real_connected = ksu_ovl_dentry_real_at(connected,
layer->idx);
if (real_connected == real)
break;
/* Find the topmost dentry not yet connected */
next = dget(real);
for (;;) {
parent = dget_parent(next);
if (parent == real_connected)
break;
/*
* If real has been moved out of 'real_connected',
* we will not find 'real_connected' and hit the layer
* root. In that case, we need to restart connecting.
* This game can go on forever in the worst case. We
* may want to consider taking s_vfs_rename_mutex if
* this happens more than once.
*/
if (parent == layer->mnt->mnt_root) {
dput(connected);
connected = dget(sb->s_root);
break;
}
/*
* If real file has been moved out of the layer root
* directory, we will eventully hit the real fs root.
* This cannot happen by legit overlay rename, so we
* return error in that case.
*/
if (parent == next) {
err = -EXDEV;
break;
}
dput(next);
next = parent;
}
if (!err) {
this = ksu_ovl_lookup_real_one(connected, next, layer);
if (IS_ERR(this))
err = PTR_ERR(this);
/*
* Lookup of child in overlay can fail when racing with
* overlay rename of child away from 'connected' parent.
* In this case, we need to restart the lookup from the
* top, because we cannot trust that 'real_connected' is
* still an ancestor of 'real'. There is a good chance
* that the renamed overlay ancestor is now in cache, so
* ksu_ovl_lookup_real_ancestor() will find it and we can
* continue to connect exactly from where lookup failed.
*/
if (err == -ECHILD) {
this = ksu_ovl_lookup_real_ancestor(sb, real,
layer);
err = PTR_ERR_OR_ZERO(this);
}
if (!err) {
dput(connected);
connected = this;
}
}
dput(parent);
dput(next);
}
if (err)
goto fail;
return connected;
fail:
pr_warn_ratelimited("failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
real, layer->idx, connected, err);
dput(connected);
return ERR_PTR(err);
}
/*
* Get an overlay dentry from upper/lower real dentries and index.
*/
static struct dentry *ksu_ovl_get_dentry(struct super_block *sb,
struct dentry *upper,
struct ksu_ovl_path *lowerpath,
struct dentry *index)
{
struct ksu_ovl_fs *ofs = sb->s_fs_info;
const struct ksu_ovl_layer *layer = upper ? &ofs->layers[0] : lowerpath->layer;
struct dentry *real = upper ?: (index ?: lowerpath->dentry);
/*
* Obtain a disconnected overlay dentry from a non-dir real dentry
* and index.
*/
if (!d_is_dir(real))
return ksu_ovl_obtain_alias(sb, upper, lowerpath, index);
/* Removed empty directory? */
if ((real->d_flags & DCACHE_DISCONNECTED) || d_unhashed(real))
return ERR_PTR(-ENOENT);
/*
* If real dentry is connected and hashed, get a connected overlay
* dentry whose real dentry is @real.
*/
return ksu_ovl_lookup_real(sb, real, layer);
}
static struct dentry *ksu_ovl_upper_fh_to_d(struct super_block *sb,
struct ksu_ovl_fh *fh)
{
struct ksu_ovl_fs *ofs = sb->s_fs_info;
struct dentry *dentry;
struct dentry *upper;
if (!ksu_ovl_upper_mnt(ofs))
return ERR_PTR(-EACCES);
upper = ksu_ovl_decode_real_fh(fh, ksu_ovl_upper_mnt(ofs), true);
if (IS_ERR_OR_NULL(upper))
return upper;
dentry = ksu_ovl_get_dentry(sb, upper, NULL, NULL);
dput(upper);
return dentry;
}
static struct dentry *ksu_ovl_lower_fh_to_d(struct super_block *sb,
struct ksu_ovl_fh *fh)
{
struct ksu_ovl_fs *ofs = sb->s_fs_info;
struct ksu_ovl_path origin = { };
struct ksu_ovl_path *stack = &origin;
struct dentry *dentry = NULL;
struct dentry *index = NULL;
struct inode *inode;
int err;
/* First lookup overlay inode in inode cache by origin fh */
err = ksu_ovl_check_origin_fh(ofs, fh, false, NULL, &stack);
if (err)
return ERR_PTR(err);
if (!d_is_dir(origin.dentry) ||
!(origin.dentry->d_flags & DCACHE_DISCONNECTED)) {
inode = ksu_ovl_lookup_inode(sb, origin.dentry, false);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_err;
if (inode) {
dentry = d_find_any_alias(inode);
iput(inode);
if (dentry)
goto out;
}
}
/* Then lookup indexed upper/whiteout by origin fh */
if (ofs->indexdir) {
index = ksu_ovl_get_index_fh(ofs, fh);
err = PTR_ERR(index);
if (IS_ERR(index)) {
index = NULL;
goto out_err;
}
}
/* Then try to get a connected upper dir by index */
if (index && d_is_dir(index)) {
struct dentry *upper = ksu_ovl_index_upper(ofs, index);
err = PTR_ERR(upper);
if (IS_ERR_OR_NULL(upper))
goto out_err;
dentry = ksu_ovl_get_dentry(sb, upper, NULL, NULL);
dput(upper);
goto out;
}
/* Find origin.dentry again with ksu_ovl_acceptable() layer check */
if (d_is_dir(origin.dentry)) {
dput(origin.dentry);
origin.dentry = NULL;
err = ksu_ovl_check_origin_fh(ofs, fh, true, NULL, &stack);
if (err)
goto out_err;
}
if (index) {
err = ksu_ovl_verify_origin(ofs, index, origin.dentry, false);
if (err)
goto out_err;
}
/* Get a connected non-upper dir or disconnected non-dir */
dentry = ksu_ovl_get_dentry(sb, NULL, &origin, index);
out:
dput(origin.dentry);
dput(index);
return dentry;
out_err:
dentry = ERR_PTR(err);
goto out;
}
static struct ksu_ovl_fh *ksu_ovl_fid_to_fh(struct fid *fid, int buflen, int fh_type)
{
struct ksu_ovl_fh *fh;
/* If on-wire inner fid is aligned - nothing to do */
if (fh_type == KSU_OVL_FILEID_V1)
return (struct ksu_ovl_fh *)fid;
if (fh_type != KSU_OVL_FILEID_V0)
return ERR_PTR(-EINVAL);
if (buflen <= KSU_OVL_FH_WIRE_OFFSET)
return ERR_PTR(-EINVAL);
fh = kzalloc(buflen, GFP_KERNEL);
if (!fh)
return ERR_PTR(-ENOMEM);
/* Copy unaligned inner fh into aligned buffer */
memcpy(&fh->fb, fid, buflen - KSU_OVL_FH_WIRE_OFFSET);
return fh;
}
static struct dentry *ksu_ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
struct dentry *dentry = NULL;
struct ksu_ovl_fh *fh = NULL;
int len = fh_len << 2;
unsigned int flags = 0;
int err;
fh = ksu_ovl_fid_to_fh(fid, len, fh_type);
err = PTR_ERR(fh);
if (IS_ERR(fh))
goto out_err;
err = ksu_ovl_check_fh_len(fh, len);
if (err)
goto out_err;
flags = fh->fb.flags;
dentry = (flags & KSU_OVL_FH_FLAG_PATH_UPPER) ?
ksu_ovl_upper_fh_to_d(sb, fh) :
ksu_ovl_lower_fh_to_d(sb, fh);
err = PTR_ERR(dentry);
if (IS_ERR(dentry) && err != -ESTALE)
goto out_err;
out:
/* We may have needed to re-align KSU_OVL_FILEID_V0 */
if (!IS_ERR_OR_NULL(fh) && fh != (void *)fid)
kfree(fh);
return dentry;
out_err:
pr_warn_ratelimited("failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
fh_len, fh_type, flags, err);
dentry = ERR_PTR(err);
goto out;
}
static struct dentry *ksu_ovl_fh_to_parent(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
pr_warn_ratelimited("connectable file handles not supported; use 'no_subtree_check' exportfs option.\n");
return ERR_PTR(-EACCES);
}
static int ksu_ovl_get_name(struct dentry *parent, char *name,
struct dentry *child)
{
/*
* ksu_ovl_fh_to_dentry() returns connected dir overlay dentries and
* ksu_ovl_fh_to_parent() is not implemented, so we should not get here.
*/
WARN_ON_ONCE(1);
return -EIO;
}
static struct dentry *ksu_ovl_get_parent(struct dentry *dentry)
{
/*
* ksu_ovl_fh_to_dentry() returns connected dir overlay dentries, so we
* should not get here.
*/
WARN_ON_ONCE(1);
return ERR_PTR(-EIO);
}
const struct export_operations ksu_ovl_export_operations = {
.encode_fh = ksu_ovl_encode_fh,
.fh_to_dentry = ksu_ovl_fh_to_dentry,
.fh_to_parent = ksu_ovl_fh_to_parent,
.get_name = ksu_ovl_get_name,
.get_parent = ksu_ovl_get_parent,
};

809
kernel/ksufs/5.10/file.c Normal file
View File

@@ -0,0 +1,809 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2017 Red Hat, Inc.
*/
#include <linux/cred.h>
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/xattr.h>
#include <linux/uio.h>
#include <linux/uaccess.h>
#include <linux/splice.h>
#include <linux/security.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include "overlayfs.h"
#define KSU_OVL_IOCB_MASK (IOCB_DSYNC | IOCB_HIPRI | IOCB_NOWAIT | IOCB_SYNC)
struct ksu_ovl_aio_req {
struct kiocb iocb;
refcount_t ref;
struct kiocb *orig_iocb;
struct fd fd;
};
static struct kmem_cache *ksu_ovl_aio_request_cachep;
static char ksu_ovl_whatisit(struct inode *inode, struct inode *realinode)
{
if (realinode != ksu_ovl_inode_upper(inode))
return 'l';
if (ksu_ovl_has_upperdata(inode))
return 'u';
else
return 'm';
}
/* No atime modificaton nor notify on underlying */
#define KSU_OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
static struct file *ksu_ovl_open_realfile(const struct file *file,
struct inode *realinode)
{
struct inode *inode = file_inode(file);
struct file *realfile;
const struct cred *old_cred;
int flags = file->f_flags | KSU_OVL_OPEN_FLAGS;
int acc_mode = ACC_MODE(flags);
int err;
if (flags & O_APPEND)
acc_mode |= MAY_APPEND;
old_cred = ksu_ovl_override_creds(inode->i_sb);
err = inode_permission(realinode, MAY_OPEN | acc_mode);
if (err) {
realfile = ERR_PTR(err);
} else if (old_cred && !inode_owner_or_capable(realinode)) {
realfile = ERR_PTR(-EPERM);
} else {
realfile = open_with_fake_path(&file->f_path, flags, realinode,
current_cred());
}
ksu_ovl_revert_creds(inode->i_sb, old_cred);
pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
file, file, ksu_ovl_whatisit(inode, realinode), file->f_flags,
realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
return realfile;
}
#define KSU_OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
static int ksu_ovl_change_flags(struct file *file, unsigned int flags)
{
struct inode *inode = file_inode(file);
int err;
flags |= KSU_OVL_OPEN_FLAGS;
/* If some flag changed that cannot be changed then something's amiss */
if (WARN_ON((file->f_flags ^ flags) & ~KSU_OVL_SETFL_MASK))
return -EIO;
flags &= KSU_OVL_SETFL_MASK;
if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
return -EPERM;
if (flags & O_DIRECT) {
if (!file->f_mapping->a_ops ||
!file->f_mapping->a_ops->direct_IO)
return -EINVAL;
}
if (file->f_op->check_flags) {
err = file->f_op->check_flags(flags);
if (err)
return err;
}
spin_lock(&file->f_lock);
file->f_flags = (file->f_flags & ~KSU_OVL_SETFL_MASK) | flags;
spin_unlock(&file->f_lock);
return 0;
}
static int ksu_ovl_real_fdget_meta(const struct file *file, struct fd *real,
bool allow_meta)
{
struct inode *inode = file_inode(file);
struct inode *realinode;
real->flags = 0;
real->file = file->private_data;
if (allow_meta)
realinode = ksu_ovl_inode_real(inode);
else
realinode = ksu_ovl_inode_realdata(inode);
/* Has it been copied up since we'd opened it? */
if (unlikely(file_inode(real->file) != realinode)) {
real->flags = FDPUT_FPUT;
real->file = ksu_ovl_open_realfile(file, realinode);
return PTR_ERR_OR_ZERO(real->file);
}
/* Did the flags change since open? */
if (unlikely((file->f_flags ^ real->file->f_flags) & ~KSU_OVL_OPEN_FLAGS))
return ksu_ovl_change_flags(real->file, file->f_flags);
return 0;
}
static int ksu_ovl_real_fdget(const struct file *file, struct fd *real)
{
if (d_is_dir(file_dentry(file))) {
real->flags = 0;
real->file = ksu_ovl_dir_real_file(file, false);
return PTR_ERR_OR_ZERO(real->file);
}
return ksu_ovl_real_fdget_meta(file, real, false);
}
static int ksu_ovl_open(struct inode *inode, struct file *file)
{
struct file *realfile;
int err;
err = ksu_ovl_maybe_copy_up(file_dentry(file), file->f_flags);
if (err)
return err;
/* No longer need these flags, so don't pass them on to underlying fs */
file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
realfile = ksu_ovl_open_realfile(file, ksu_ovl_inode_realdata(inode));
if (IS_ERR(realfile))
return PTR_ERR(realfile);
file->private_data = realfile;
return 0;
}
static int ksu_ovl_release(struct inode *inode, struct file *file)
{
fput(file->private_data);
return 0;
}
static loff_t ksu_ovl_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file_inode(file);
struct fd real;
const struct cred *old_cred;
loff_t ret;
/*
* The two special cases below do not need to involve real fs,
* so we can optimizing concurrent callers.
*/
if (offset == 0) {
if (whence == SEEK_CUR)
return file->f_pos;
if (whence == SEEK_SET)
return vfs_setpos(file, 0, 0);
}
ret = ksu_ovl_real_fdget(file, &real);
if (ret)
return ret;
/*
* Overlay file f_pos is the master copy that is preserved
* through copy up and modified on read/write, but only real
* fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
* limitations that are more strict than ->s_maxbytes for specific
* files, so we use the real file to perform seeks.
*/
ksu_ovl_inode_lock(inode);
real.file->f_pos = file->f_pos;
old_cred = ksu_ovl_override_creds(inode->i_sb);
ret = vfs_llseek(real.file, offset, whence);
ksu_ovl_revert_creds(inode->i_sb, old_cred);
file->f_pos = real.file->f_pos;
ksu_ovl_inode_unlock(inode);
fdput(real);
return ret;
}
static void ksu_ovl_file_accessed(struct file *file)
{
struct inode *inode, *upperinode;
if (file->f_flags & O_NOATIME)
return;
inode = file_inode(file);
upperinode = ksu_ovl_inode_upper(inode);
if (!upperinode)
return;
if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
!timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
inode->i_mtime = upperinode->i_mtime;
inode->i_ctime = upperinode->i_ctime;
}
touch_atime(&file->f_path);
}
static inline void ksu_ovl_aio_put(struct ksu_ovl_aio_req *aio_req)
{
if (refcount_dec_and_test(&aio_req->ref)) {
fdput(aio_req->fd);
kmem_cache_free(ksu_ovl_aio_request_cachep, aio_req);
}
}
static void ksu_ovl_aio_cleanup_handler(struct ksu_ovl_aio_req *aio_req)
{
struct kiocb *iocb = &aio_req->iocb;
struct kiocb *orig_iocb = aio_req->orig_iocb;
if (iocb->ki_flags & IOCB_WRITE) {
struct inode *inode = file_inode(orig_iocb->ki_filp);
/* Actually acquired in ksu_ovl_write_iter() */
__sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
SB_FREEZE_WRITE);
file_end_write(iocb->ki_filp);
ksu_ovl_copyattr(ksu_ovl_inode_real(inode), inode);
}
orig_iocb->ki_pos = iocb->ki_pos;
ksu_ovl_aio_put(aio_req);
}
static void ksu_ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
{
struct ksu_ovl_aio_req *aio_req = container_of(iocb,
struct ksu_ovl_aio_req, iocb);
struct kiocb *orig_iocb = aio_req->orig_iocb;
ksu_ovl_aio_cleanup_handler(aio_req);
orig_iocb->ki_complete(orig_iocb, res, res2);
}
static ssize_t ksu_ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
struct fd real;
const struct cred *old_cred;
ssize_t ret;
if (!iov_iter_count(iter))
return 0;
ret = ksu_ovl_real_fdget(file, &real);
if (ret)
return ret;
ret = -EINVAL;
if (iocb->ki_flags & IOCB_DIRECT &&
(!real.file->f_mapping->a_ops ||
!real.file->f_mapping->a_ops->direct_IO))
goto out_fdput;
old_cred = ksu_ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) {
ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
iocb_to_rw_flags(iocb->ki_flags,
KSU_OVL_IOCB_MASK));
} else {
struct ksu_ovl_aio_req *aio_req;
ret = -ENOMEM;
aio_req = kmem_cache_zalloc(ksu_ovl_aio_request_cachep, GFP_KERNEL);
if (!aio_req)
goto out;
aio_req->fd = real;
real.flags = 0;
aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, real.file);
aio_req->iocb.ki_complete = ksu_ovl_aio_rw_complete;
refcount_set(&aio_req->ref, 2);
ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
ksu_ovl_aio_put(aio_req);
if (ret != -EIOCBQUEUED)
ksu_ovl_aio_cleanup_handler(aio_req);
}
out:
ksu_ovl_revert_creds(file_inode(file)->i_sb, old_cred);
ksu_ovl_file_accessed(file);
out_fdput:
fdput(real);
return ret;
}
static ssize_t ksu_ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct fd real;
const struct cred *old_cred;
ssize_t ret;
int ifl = iocb->ki_flags;
if (!iov_iter_count(iter))
return 0;
inode_lock(inode);
/* Update mode */
ksu_ovl_copyattr(ksu_ovl_inode_real(inode), inode);
ret = file_remove_privs(file);
if (ret)
goto out_unlock;
ret = ksu_ovl_real_fdget(file, &real);
if (ret)
goto out_unlock;
ret = -EINVAL;
if (iocb->ki_flags & IOCB_DIRECT &&
(!real.file->f_mapping->a_ops ||
!real.file->f_mapping->a_ops->direct_IO))
goto out_fdput;
if (!ksu_ovl_should_sync(KSU_OVL_FS(inode->i_sb)))
ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
old_cred = ksu_ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) {
file_start_write(real.file);
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
iocb_to_rw_flags(ifl, KSU_OVL_IOCB_MASK));
file_end_write(real.file);
/* Update size */
ksu_ovl_copyattr(ksu_ovl_inode_real(inode), inode);
} else {
struct ksu_ovl_aio_req *aio_req;
ret = -ENOMEM;
aio_req = kmem_cache_zalloc(ksu_ovl_aio_request_cachep, GFP_KERNEL);
if (!aio_req)
goto out;
file_start_write(real.file);
/* Pacify lockdep, same trick as done in aio_write() */
__sb_writers_release(file_inode(real.file)->i_sb,
SB_FREEZE_WRITE);
aio_req->fd = real;
real.flags = 0;
aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, real.file);
aio_req->iocb.ki_flags = ifl;
aio_req->iocb.ki_complete = ksu_ovl_aio_rw_complete;
refcount_set(&aio_req->ref, 2);
ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
ksu_ovl_aio_put(aio_req);
if (ret != -EIOCBQUEUED)
ksu_ovl_aio_cleanup_handler(aio_req);
}
out:
ksu_ovl_revert_creds(file_inode(file)->i_sb, old_cred);
out_fdput:
fdput(real);
out_unlock:
inode_unlock(inode);
return ret;
}
/*
* Calling iter_file_splice_write() directly from overlay's f_op may deadlock
* due to lock order inversion between pipe->mutex in iter_file_splice_write()
* and file_start_write(real.file) in ksu_ovl_write_iter().
*
* So do everything ksu_ovl_write_iter() does and call iter_file_splice_write() on
* the real file.
*/
static ssize_t ksu_ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags)
{
struct fd real;
const struct cred *old_cred;
struct inode *inode = file_inode(out);
struct inode *realinode = ksu_ovl_inode_real(inode);
ssize_t ret;
inode_lock(inode);
/* Update mode */
ksu_ovl_copyattr(realinode, inode);
ret = file_remove_privs(out);
if (ret)
goto out_unlock;
ret = ksu_ovl_real_fdget(out, &real);
if (ret)
goto out_unlock;
old_cred = ksu_ovl_override_creds(inode->i_sb);
file_start_write(real.file);
ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
file_end_write(real.file);
/* Update size */
ksu_ovl_copyattr(realinode, inode);
ksu_ovl_revert_creds(inode->i_sb, old_cred);
fdput(real);
out_unlock:
inode_unlock(inode);
return ret;
}
static int ksu_ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct fd real;
const struct cred *old_cred;
int ret;
ret = ksu_ovl_sync_status(KSU_OVL_FS(file_inode(file)->i_sb));
if (ret <= 0)
return ret;
ret = ksu_ovl_real_fdget_meta(file, &real, !datasync);
if (ret)
return ret;
/* Don't sync lower file for fear of receiving EROFS error */
if (file_inode(real.file) == ksu_ovl_inode_upper(file_inode(file))) {
old_cred = ksu_ovl_override_creds(file_inode(file)->i_sb);
ret = vfs_fsync_range(real.file, start, end, datasync);
ksu_ovl_revert_creds(file_inode(file)->i_sb, old_cred);
}
fdput(real);
return ret;
}
static int ksu_ovl_mmap(struct file *file, struct vm_area_struct *vma)
{
struct file *realfile = file->private_data;
const struct cred *old_cred;
int ret;
if (!realfile->f_op->mmap)
return -ENODEV;
if (WARN_ON(file != vma->vm_file))
return -EIO;
vma->vm_file = get_file(realfile);
old_cred = ksu_ovl_override_creds(file_inode(file)->i_sb);
ret = call_mmap(vma->vm_file, vma);
ksu_ovl_revert_creds(file_inode(file)->i_sb, old_cred);
if (ret) {
/* Drop reference count from new vm_file value */
fput(realfile);
} else {
/* Drop reference count from previous vm_file value */
fput(file);
}
ksu_ovl_file_accessed(file);
return ret;
}
static long ksu_ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
struct fd real;
const struct cred *old_cred;
int ret;
inode_lock(inode);
/* Update mode */
ksu_ovl_copyattr(ksu_ovl_inode_real(inode), inode);
ret = file_remove_privs(file);
if (ret)
goto out_unlock;
ret = ksu_ovl_real_fdget(file, &real);
if (ret)
goto out_unlock;
old_cred = ksu_ovl_override_creds(file_inode(file)->i_sb);
ret = vfs_fallocate(real.file, mode, offset, len);
ksu_ovl_revert_creds(file_inode(file)->i_sb, old_cred);
/* Update size */
ksu_ovl_copyattr(ksu_ovl_inode_real(inode), inode);
fdput(real);
out_unlock:
inode_unlock(inode);
return ret;
}
static int ksu_ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
{
struct fd real;
const struct cred *old_cred;
int ret;
ret = ksu_ovl_real_fdget(file, &real);
if (ret)
return ret;
old_cred = ksu_ovl_override_creds(file_inode(file)->i_sb);
ret = vfs_fadvise(real.file, offset, len, advice);
ksu_ovl_revert_creds(file_inode(file)->i_sb, old_cred);
fdput(real);
return ret;
}
static long ksu_ovl_real_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fd real;
long ret;
ret = ksu_ovl_real_fdget(file, &real);
if (ret)
return ret;
ret = security_file_ioctl(real.file, cmd, arg);
if (!ret) {
/*
* Don't override creds, since we currently can't safely check
* permissions before doing so.
*/
ret = vfs_ioctl(real.file, cmd, arg);
}
fdput(real);
return ret;
}
static long ksu_ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
unsigned long arg)
{
long ret;
struct inode *inode = file_inode(file);
if (!inode_owner_or_capable(inode))
return -EACCES;
ret = mnt_want_write_file(file);
if (ret)
return ret;
inode_lock(inode);
/*
* Prevent copy up if immutable and has no CAP_LINUX_IMMUTABLE
* capability.
*/
ret = -EPERM;
if (!ksu_ovl_has_upperdata(inode) && IS_IMMUTABLE(inode) &&
!capable(CAP_LINUX_IMMUTABLE))
goto unlock;
ret = ksu_ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
if (ret)
goto unlock;
ret = ksu_ovl_real_ioctl(file, cmd, arg);
ksu_ovl_copyflags(ksu_ovl_inode_real(inode), inode);
unlock:
inode_unlock(inode);
mnt_drop_write_file(file);
return ret;
}
long ksu_ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
long ret;
switch (cmd) {
case FS_IOC_GETFLAGS:
case FS_IOC_FSGETXATTR:
ret = ksu_ovl_real_ioctl(file, cmd, arg);
break;
case FS_IOC_FSSETXATTR:
case FS_IOC_SETFLAGS:
ret = ksu_ovl_ioctl_set_flags(file, cmd, arg);
break;
default:
ret = -ENOTTY;
}
return ret;
}
#ifdef CONFIG_COMPAT
long ksu_ovl_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
case FS_IOC32_GETFLAGS:
cmd = FS_IOC_GETFLAGS;
break;
case FS_IOC32_SETFLAGS:
cmd = FS_IOC_SETFLAGS;
break;
default:
return -ENOIOCTLCMD;
}
return ksu_ovl_ioctl(file, cmd, arg);
}
#endif
enum ksu_ovl_copyop {
KSU_OVL_COPY,
KSU_OVL_CLONE,
KSU_OVL_DEDUPE,
};
static loff_t ksu_ovl_copyfile(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int flags, enum ksu_ovl_copyop op)
{
struct inode *inode_out = file_inode(file_out);
struct fd real_in, real_out;
const struct cred *old_cred;
loff_t ret;
inode_lock(inode_out);
if (op != KSU_OVL_DEDUPE) {
/* Update mode */
ksu_ovl_copyattr(ksu_ovl_inode_real(inode_out), inode_out);
ret = file_remove_privs(file_out);
if (ret)
goto out_unlock;
}
ret = ksu_ovl_real_fdget(file_out, &real_out);
if (ret)
goto out_unlock;
ret = ksu_ovl_real_fdget(file_in, &real_in);
if (ret) {
fdput(real_out);
goto out_unlock;
}
old_cred = ksu_ovl_override_creds(file_inode(file_out)->i_sb);
switch (op) {
case KSU_OVL_COPY:
ret = vfs_copy_file_range(real_in.file, pos_in,
real_out.file, pos_out, len, flags);
break;
case KSU_OVL_CLONE:
ret = vfs_clone_file_range(real_in.file, pos_in,
real_out.file, pos_out, len, flags);
break;
case KSU_OVL_DEDUPE:
ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
real_out.file, pos_out, len,
flags);
break;
}
ksu_ovl_revert_creds(file_inode(file_out)->i_sb, old_cred);
/* Update size */
ksu_ovl_copyattr(ksu_ovl_inode_real(inode_out), inode_out);
fdput(real_in);
fdput(real_out);
out_unlock:
inode_unlock(inode_out);
return ret;
}
static ssize_t ksu_ovl_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags)
{
return ksu_ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
KSU_OVL_COPY);
}
static loff_t ksu_ovl_remap_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags)
{
enum ksu_ovl_copyop op;
if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
return -EINVAL;
if (remap_flags & REMAP_FILE_DEDUP)
op = KSU_OVL_DEDUPE;
else
op = KSU_OVL_CLONE;
/*
* Don't copy up because of a dedupe request, this wouldn't make sense
* most of the time (data would be duplicated instead of deduplicated).
*/
if (op == KSU_OVL_DEDUPE &&
(!ksu_ovl_inode_upper(file_inode(file_in)) ||
!ksu_ovl_inode_upper(file_inode(file_out))))
return -EPERM;
return ksu_ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
remap_flags, op);
}
const struct file_operations ksu_ovl_file_operations = {
.open = ksu_ovl_open,
.release = ksu_ovl_release,
.llseek = ksu_ovl_llseek,
.read_iter = ksu_ovl_read_iter,
.write_iter = ksu_ovl_write_iter,
.fsync = ksu_ovl_fsync,
.mmap = ksu_ovl_mmap,
.fallocate = ksu_ovl_fallocate,
.fadvise = ksu_ovl_fadvise,
.unlocked_ioctl = ksu_ovl_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ksu_ovl_compat_ioctl,
#endif
.splice_read = generic_file_splice_read,
.splice_write = ksu_ovl_splice_write,
.copy_file_range = ksu_ovl_copy_file_range,
.remap_file_range = ksu_ovl_remap_file_range,
};
int __init ksu_ovl_aio_request_cache_init(void)
{
ksu_ovl_aio_request_cachep = kmem_cache_create("ksu_ovl_aio_req",
sizeof(struct ksu_ovl_aio_req),
0, SLAB_HWCACHE_ALIGN, NULL);
if (!ksu_ovl_aio_request_cachep)
return -ENOMEM;
return 0;
}
void ksu_ovl_aio_request_cache_destroy(void)
{
kmem_cache_destroy(ksu_ovl_aio_request_cachep);
}

1049
kernel/ksufs/5.10/inode.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,145 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
*
* Copyright (C) 2011 Novell Inc.
* Copyright (C) 2016 Red Hat, Inc.
*/
struct ksu_ovl_config {
char *lowerdir;
char *upperdir;
char *workdir;
bool default_permissions;
bool redirect_dir;
bool redirect_follow;
const char *redirect_mode;
bool index;
bool nfs_export;
int xino;
bool metacopy;
bool ksu_ovl_volatile;
bool override_creds;
};
struct ksu_ovl_sb {
struct super_block *sb;
dev_t pseudo_dev;
/* Unusable (conflicting) uuid */
bool bad_uuid;
/* Used as a lower layer (but maybe also as upper) */
bool is_lower;
};
struct ksu_ovl_layer {
struct vfsmount *mnt;
/* Trap in ovl inode cache */
struct inode *trap;
struct ksu_ovl_sb *fs;
/* Index of this layer in fs root (upper idx == 0) */
int idx;
/* One fsid per unique underlying sb (upper fsid == 0) */
int fsid;
};
struct ksu_ovl_path {
const struct ksu_ovl_layer *layer;
struct dentry *dentry;
};
/* private information held for overlayfs's superblock */
struct ksu_ovl_fs {
unsigned int numlayer;
/* Number of unique fs among layers including upper fs */
unsigned int numfs;
const struct ksu_ovl_layer *layers;
struct ksu_ovl_sb *fs;
/* workbasedir is the path at workdir= mount option */
struct dentry *workbasedir;
/* workdir is the 'work' directory under workbasedir */
struct dentry *workdir;
/* index directory listing overlay inodes by origin file handle */
struct dentry *indexdir;
long namelen;
/* pathnames of lower and upper dirs, for show_options */
struct ksu_ovl_config config;
/* creds of process who forced instantiation of super block */
const struct cred *creator_cred;
bool tmpfile;
bool noxattr;
/* Did we take the inuse lock? */
bool upperdir_locked;
bool workdir_locked;
bool share_whiteout;
/* Traps in ovl inode cache */
struct inode *workbasedir_trap;
struct inode *workdir_trap;
struct inode *indexdir_trap;
/* -1: disabled, 0: same fs, 1..32: number of unused ino bits */
int xino_mode;
/* For allocation of non-persistent inode numbers */
atomic_long_t last_ino;
/* Whiteout dentry cache */
struct dentry *whiteout;
/* r/o snapshot of upperdir sb's only taken on volatile mounts */
errseq_t errseq;
};
static inline struct vfsmount *ksu_ovl_upper_mnt(struct ksu_ovl_fs *ofs)
{
return ofs->layers[0].mnt;
}
static inline struct ksu_ovl_fs *KSU_OVL_FS(struct super_block *sb)
{
return (struct ksu_ovl_fs *)sb->s_fs_info;
}
static inline bool ksu_ovl_should_sync(struct ksu_ovl_fs *ofs)
{
return !ofs->config.ksu_ovl_volatile;
}
/* private information held for every overlayfs dentry */
struct ksu_ovl_entry {
union {
struct {
unsigned long flags;
};
struct rcu_head rcu;
};
unsigned numlower;
struct ksu_ovl_path lowerstack[];
};
struct ksu_ovl_entry *ksu_ovl_alloc_entry(unsigned int numlower);
static inline struct ksu_ovl_entry *KSU_OVL_E(struct dentry *dentry)
{
return (struct ksu_ovl_entry *) dentry->d_fsdata;
}
struct ksu_ovl_inode {
union {
struct ksu_ovl_dir_cache *cache; /* directory */
struct inode *lowerdata; /* regular file */
};
const char *redirect;
u64 version;
unsigned long flags;
struct inode vfs_inode;
struct dentry *__upperdentry;
struct inode *lower;
/* synchronize copy up and more */
struct mutex lock;
};
static inline struct ksu_ovl_inode *KSU_OVL_I(struct inode *inode)
{
return container_of(inode, struct ksu_ovl_inode, vfs_inode);
}
static inline struct dentry *ksu_ovl_upperdentry_dereference(struct ksu_ovl_inode *oi)
{
return READ_ONCE(oi->__upperdentry);
}

1184
kernel/ksufs/5.10/namei.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,551 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
*
* Copyright (C) 2011 Novell Inc.
*/
#include <linux/kernel.h>
#include <linux/uuid.h>
#include <linux/fs.h>
#include "ksu_ovl_entry.h"
#undef pr_fmt
#define pr_fmt(fmt) "ksufs: " fmt
enum ksu_ovl_path_type {
__KSU_OVL_PATH_UPPER = (1 << 0),
__KSU_OVL_PATH_MERGE = (1 << 1),
__KSU_OVL_PATH_ORIGIN = (1 << 2),
};
#define KSU_OVL_TYPE_UPPER(type) ((type) & __KSU_OVL_PATH_UPPER)
#define KSU_OVL_TYPE_MERGE(type) ((type) & __KSU_OVL_PATH_MERGE)
#define KSU_OVL_TYPE_ORIGIN(type) ((type) & __KSU_OVL_PATH_ORIGIN)
#define KSU_OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
enum ksu_ovl_xattr {
KSU_OVL_XATTR_OPAQUE,
KSU_OVL_XATTR_REDIRECT,
KSU_OVL_XATTR_ORIGIN,
KSU_OVL_XATTR_IMPURE,
KSU_OVL_XATTR_NLINK,
KSU_OVL_XATTR_UPPER,
KSU_OVL_XATTR_METACOPY,
};
enum ksu_ovl_inode_flag {
/* Pure upper dir that may contain non pure upper entries */
KSU_OVL_IMPURE,
/* Non-merge dir that may contain whiteout entries */
KSU_OVL_WHITEOUTS,
KSU_OVL_INDEX,
KSU_OVL_UPPERDATA,
/* Inode number will remain constant over copy up. */
KSU_OVL_CONST_INO,
};
enum ksu_ovl_entry_flag {
KSU_OVL_E_UPPER_ALIAS,
KSU_OVL_E_OPAQUE,
KSU_OVL_E_CONNECTED,
};
enum {
KSU_OVL_XINO_OFF,
KSU_OVL_XINO_AUTO,
KSU_OVL_XINO_ON,
};
/*
* The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
* where:
* origin.fh - exported file handle of the lower file
* origin.uuid - uuid of the lower filesystem
*/
#define KSU_OVL_FH_VERSION 0
#define KSU_OVL_FH_MAGIC 0xfb
/* CPU byte order required for fid decoding: */
#define KSU_OVL_FH_FLAG_BIG_ENDIAN (1 << 0)
#define KSU_OVL_FH_FLAG_ANY_ENDIAN (1 << 1)
/* Is the real inode encoded in fid an upper inode? */
#define KSU_OVL_FH_FLAG_PATH_UPPER (1 << 2)
#define KSU_OVL_FH_FLAG_ALL (KSU_OVL_FH_FLAG_BIG_ENDIAN | KSU_OVL_FH_FLAG_ANY_ENDIAN | \
KSU_OVL_FH_FLAG_PATH_UPPER)
#if defined(__LITTLE_ENDIAN)
#define KSU_OVL_FH_FLAG_CPU_ENDIAN 0
#elif defined(__BIG_ENDIAN)
#define KSU_OVL_FH_FLAG_CPU_ENDIAN KSU_OVL_FH_FLAG_BIG_ENDIAN
#else
#error Endianness not defined
#endif
/* The type used to be returned by overlay exportfs for misaligned fid */
#define KSU_OVL_FILEID_V0 0xfb
/* The type returned by overlay exportfs for 32bit aligned fid */
#define KSU_OVL_FILEID_V1 0xf8
/* On-disk format for "origin" file handle */
struct ksu_ovl_fb {
u8 version; /* 0 */
u8 magic; /* 0xfb */
u8 len; /* size of this header + size of fid */
u8 flags; /* KSU_OVL_FH_FLAG_* */
u8 type; /* fid_type of fid */
uuid_t uuid; /* uuid of filesystem */
u32 fid[]; /* file identifier should be 32bit aligned in-memory */
} __packed;
/* In-memory and on-wire format for overlay file handle */
struct ksu_ovl_fh {
u8 padding[3]; /* make sure fb.fid is 32bit aligned */
union {
struct ksu_ovl_fb fb;
u8 buf[0];
};
} __packed;
#define KSU_OVL_FH_WIRE_OFFSET offsetof(struct ksu_ovl_fh, fb)
#define KSU_OVL_FH_LEN(fh) (KSU_OVL_FH_WIRE_OFFSET + (fh)->fb.len)
#define KSU_OVL_FH_FID_OFFSET (KSU_OVL_FH_WIRE_OFFSET + \
offsetof(struct ksu_ovl_fb, fid))
extern const char *ksu_ovl_xattr_table[];
static inline const char *ksu_ovl_xattr(struct ksu_ovl_fs *ofs, enum ksu_ovl_xattr ox)
{
return ksu_ovl_xattr_table[ox];
}
static inline int ksu_ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
{
int err = vfs_rmdir(dir, dentry);
pr_debug("rmdir(%pd2) = %i\n", dentry, err);
return err;
}
static inline int ksu_ovl_do_unlink(struct inode *dir, struct dentry *dentry)
{
int err = vfs_unlink(dir, dentry, NULL);
pr_debug("unlink(%pd2) = %i\n", dentry, err);
return err;
}
static inline int ksu_ovl_do_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry)
{
int err = vfs_link(old_dentry, dir, new_dentry, NULL);
pr_debug("link(%pd2, %pd2) = %i\n", old_dentry, new_dentry, err);
return err;
}
static inline int ksu_ovl_do_create(struct inode *dir, struct dentry *dentry,
umode_t mode)
{
int err = vfs_create(dir, dentry, mode, true);
pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
return err;
}
static inline int ksu_ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
umode_t mode)
{
int err = vfs_mkdir(dir, dentry, mode);
pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
return err;
}
static inline int ksu_ovl_do_mknod(struct inode *dir, struct dentry *dentry,
umode_t mode, dev_t dev)
{
int err = vfs_mknod(dir, dentry, mode, dev);
pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err);
return err;
}
static inline int ksu_ovl_do_symlink(struct inode *dir, struct dentry *dentry,
const char *oldname)
{
int err = vfs_symlink(dir, dentry, oldname);
pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
return err;
}
static inline ssize_t ksu_ovl_do_getxattr(struct ksu_ovl_fs *ofs, struct dentry *dentry,
enum ksu_ovl_xattr ox, void *value,
size_t size)
{
const char *name = ksu_ovl_xattr(ofs, ox);
struct inode *ip = d_inode(dentry);
return __vfs_getxattr(dentry, ip, name, value, size, XATTR_NOSECURITY);
}
static inline int ksu_ovl_do_setxattr(struct ksu_ovl_fs *ofs, struct dentry *dentry,
enum ksu_ovl_xattr ox, const void *value,
size_t size)
{
const char *name = ksu_ovl_xattr(ofs, ox);
int err = vfs_setxattr(dentry, name, value, size, 0);
pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0) = %i\n",
dentry, name, min((int)size, 48), value, size, err);
return err;
}
static inline int ksu_ovl_do_removexattr(struct ksu_ovl_fs *ofs, struct dentry *dentry,
enum ksu_ovl_xattr ox)
{
const char *name = ksu_ovl_xattr(ofs, ox);
int err = vfs_removexattr(dentry, name);
pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
return err;
}
static inline int ksu_ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
struct inode *newdir, struct dentry *newdentry,
unsigned int flags)
{
int err;
pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags);
err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
if (err) {
pr_debug("...rename(%pd2, %pd2, ...) = %i\n",
olddentry, newdentry, err);
}
return err;
}
static inline int ksu_ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
{
int err = vfs_whiteout(dir, dentry);
pr_debug("whiteout(%pd2) = %i\n", dentry, err);
return err;
}
static inline struct dentry *ksu_ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
{
struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
int err = PTR_ERR_OR_ZERO(ret);
pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
return ret;
}
static inline bool ksu_ovl_open_flags_need_copy_up(int flags)
{
if (!flags)
return false;
return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
}
/* util.c */
int ksu_ovl_want_write(struct dentry *dentry);
void ksu_ovl_drop_write(struct dentry *dentry);
struct dentry *ksu_ovl_workdir(struct dentry *dentry);
const struct cred *ksu_ovl_override_creds(struct super_block *sb);
void ksu_ovl_revert_creds(struct super_block *sb, const struct cred *oldcred);
int ksu_ovl_can_decode_fh(struct super_block *sb);
struct dentry *ksu_ovl_indexdir(struct super_block *sb);
bool ksu_ovl_index_all(struct super_block *sb);
bool ksu_ovl_verify_lower(struct super_block *sb);
struct ksu_ovl_entry *ksu_ovl_alloc_entry(unsigned int numlower);
bool ksu_ovl_dentry_remote(struct dentry *dentry);
void ksu_ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry,
unsigned int mask);
bool ksu_ovl_dentry_weird(struct dentry *dentry);
enum ksu_ovl_path_type ksu_ovl_path_type(struct dentry *dentry);
void ksu_ovl_path_upper(struct dentry *dentry, struct path *path);
void ksu_ovl_path_lower(struct dentry *dentry, struct path *path);
void ksu_ovl_path_lowerdata(struct dentry *dentry, struct path *path);
enum ksu_ovl_path_type ksu_ovl_path_real(struct dentry *dentry, struct path *path);
struct dentry *ksu_ovl_dentry_upper(struct dentry *dentry);
struct dentry *ksu_ovl_dentry_lower(struct dentry *dentry);
struct dentry *ksu_ovl_dentry_lowerdata(struct dentry *dentry);
const struct ksu_ovl_layer *ksu_ovl_layer_lower(struct dentry *dentry);
struct dentry *ksu_ovl_dentry_real(struct dentry *dentry);
struct dentry *ksu_ovl_i_dentry_upper(struct inode *inode);
struct inode *ksu_ovl_inode_upper(struct inode *inode);
struct inode *ksu_ovl_inode_lower(struct inode *inode);
struct inode *ksu_ovl_inode_lowerdata(struct inode *inode);
struct inode *ksu_ovl_inode_real(struct inode *inode);
struct inode *ksu_ovl_inode_realdata(struct inode *inode);
struct ksu_ovl_dir_cache *ksu_ovl_dir_cache(struct inode *inode);
void ksu_ovl_set_dir_cache(struct inode *inode, struct ksu_ovl_dir_cache *cache);
void ksu_ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry);
void ksu_ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry);
bool ksu_ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry);
bool ksu_ovl_dentry_is_opaque(struct dentry *dentry);
bool ksu_ovl_dentry_is_whiteout(struct dentry *dentry);
void ksu_ovl_dentry_set_opaque(struct dentry *dentry);
bool ksu_ovl_dentry_has_upper_alias(struct dentry *dentry);
void ksu_ovl_dentry_set_upper_alias(struct dentry *dentry);
bool ksu_ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags);
bool ksu_ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags);
bool ksu_ovl_has_upperdata(struct inode *inode);
void ksu_ovl_set_upperdata(struct inode *inode);
bool ksu_ovl_redirect_dir(struct super_block *sb);
const char *ksu_ovl_dentry_get_redirect(struct dentry *dentry);
void ksu_ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
void ksu_ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
void ksu_ovl_dir_modified(struct dentry *dentry, bool impurity);
u64 ksu_ovl_dentry_version_get(struct dentry *dentry);
bool ksu_ovl_is_whiteout(struct dentry *dentry);
struct file *ksu_ovl_path_open(struct path *path, int flags);
int ksu_ovl_copy_up_start(struct dentry *dentry, int flags);
void ksu_ovl_copy_up_end(struct dentry *dentry);
bool ksu_ovl_already_copied_up(struct dentry *dentry, int flags);
bool ksu_ovl_check_origin_xattr(struct ksu_ovl_fs *ofs, struct dentry *dentry);
bool ksu_ovl_check_dir_xattr(struct super_block *sb, struct dentry *dentry,
enum ksu_ovl_xattr ox);
int ksu_ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
enum ksu_ovl_xattr ox, const void *value, size_t size,
int xerr);
int ksu_ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
bool ksu_ovl_inuse_trylock(struct dentry *dentry);
void ksu_ovl_inuse_unlock(struct dentry *dentry);
bool ksu_ovl_is_inuse(struct dentry *dentry);
bool ksu_ovl_need_index(struct dentry *dentry);
int ksu_ovl_nlink_start(struct dentry *dentry);
void ksu_ovl_nlink_end(struct dentry *dentry);
int ksu_ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
int ksu_ovl_check_metacopy_xattr(struct ksu_ovl_fs *ofs, struct dentry *dentry);
bool ksu_ovl_is_metacopy_dentry(struct dentry *dentry);
char *ksu_ovl_get_redirect_xattr(struct ksu_ovl_fs *ofs, struct dentry *dentry,
int padding);
int ksu_ovl_sync_status(struct ksu_ovl_fs *ofs);
static inline void ksu_ovl_set_flag(unsigned long flag, struct inode *inode)
{
set_bit(flag, &KSU_OVL_I(inode)->flags);
}
static inline void ksu_ovl_clear_flag(unsigned long flag, struct inode *inode)
{
clear_bit(flag, &KSU_OVL_I(inode)->flags);
}
static inline bool ksu_ovl_test_flag(unsigned long flag, struct inode *inode)
{
return test_bit(flag, &KSU_OVL_I(inode)->flags);
}
static inline bool ksu_ovl_is_impuredir(struct super_block *sb,
struct dentry *dentry)
{
return ksu_ovl_check_dir_xattr(sb, dentry, KSU_OVL_XATTR_IMPURE);
}
/*
* With xino=auto, we do best effort to keep all inodes on same st_dev and
* d_ino consistent with st_ino.
* With xino=on, we do the same effort but we warn if we failed.
*/
static inline bool ksu_ovl_xino_warn(struct super_block *sb)
{
return KSU_OVL_FS(sb)->config.xino == KSU_OVL_XINO_ON;
}
/* All layers on same fs? */
static inline bool ksu_ovl_same_fs(struct super_block *sb)
{
return KSU_OVL_FS(sb)->xino_mode == 0;
}
/* All overlay inodes have same st_dev? */
static inline bool ksu_ovl_same_dev(struct super_block *sb)
{
return KSU_OVL_FS(sb)->xino_mode >= 0;
}
static inline unsigned int ksu_ovl_xino_bits(struct super_block *sb)
{
return ksu_ovl_same_dev(sb) ? KSU_OVL_FS(sb)->xino_mode : 0;
}
static inline void ksu_ovl_inode_lock(struct inode *inode)
{
mutex_lock(&KSU_OVL_I(inode)->lock);
}
static inline int ksu_ovl_inode_lock_interruptible(struct inode *inode)
{
return mutex_lock_interruptible(&KSU_OVL_I(inode)->lock);
}
static inline void ksu_ovl_inode_unlock(struct inode *inode)
{
mutex_unlock(&KSU_OVL_I(inode)->lock);
}
/* namei.c */
int ksu_ovl_check_fb_len(struct ksu_ovl_fb *fb, int fb_len);
static inline int ksu_ovl_check_fh_len(struct ksu_ovl_fh *fh, int fh_len)
{
if (fh_len < sizeof(struct ksu_ovl_fh))
return -EINVAL;
return ksu_ovl_check_fb_len(&fh->fb, fh_len - KSU_OVL_FH_WIRE_OFFSET);
}
struct dentry *ksu_ovl_decode_real_fh(struct ksu_ovl_fh *fh, struct vfsmount *mnt,
bool connected);
int ksu_ovl_check_origin_fh(struct ksu_ovl_fs *ofs, struct ksu_ovl_fh *fh, bool connected,
struct dentry *upperdentry, struct ksu_ovl_path **stackp);
int ksu_ovl_verify_set_fh(struct ksu_ovl_fs *ofs, struct dentry *dentry,
enum ksu_ovl_xattr ox, struct dentry *real, bool is_upper,
bool set);
struct dentry *ksu_ovl_index_upper(struct ksu_ovl_fs *ofs, struct dentry *index);
int ksu_ovl_verify_index(struct ksu_ovl_fs *ofs, struct dentry *index);
int ksu_ovl_get_index_name(struct dentry *origin, struct qstr *name);
struct dentry *ksu_ovl_get_index_fh(struct ksu_ovl_fs *ofs, struct ksu_ovl_fh *fh);
struct dentry *ksu_ovl_lookup_index(struct ksu_ovl_fs *ofs, struct dentry *upper,
struct dentry *origin, bool verify);
int ksu_ovl_path_next(int idx, struct dentry *dentry, struct path *path);
struct dentry *ksu_ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags);
bool ksu_ovl_lower_positive(struct dentry *dentry);
static inline int ksu_ovl_verify_origin(struct ksu_ovl_fs *ofs, struct dentry *upper,
struct dentry *origin, bool set)
{
return ksu_ovl_verify_set_fh(ofs, upper, KSU_OVL_XATTR_ORIGIN, origin,
false, set);
}
static inline int ksu_ovl_verify_upper(struct ksu_ovl_fs *ofs, struct dentry *index,
struct dentry *upper, bool set)
{
return ksu_ovl_verify_set_fh(ofs, index, KSU_OVL_XATTR_UPPER, upper, true, set);
}
/* readdir.c */
extern const struct file_operations ksu_ovl_dir_operations;
struct file *ksu_ovl_dir_real_file(const struct file *file, bool want_upper);
int ksu_ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
void ksu_ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
void ksu_ovl_cache_free(struct list_head *list);
void ksu_ovl_dir_cache_free(struct inode *inode);
int ksu_ovl_check_d_type_supported(struct path *realpath);
int ksu_ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
struct dentry *dentry, int level);
int ksu_ovl_indexdir_cleanup(struct ksu_ovl_fs *ofs);
/*
* Can we iterate real dir directly?
*
* Non-merge dir may contain whiteouts from a time it was a merge upper, before
* lower dir was removed under it and possibly before it was rotated from upper
* to lower layer.
*/
static inline bool ksu_ovl_dir_is_real(struct dentry *dir)
{
return !ksu_ovl_test_flag(KSU_OVL_WHITEOUTS, d_inode(dir));
}
/* inode.c */
int ksu_ovl_set_nlink_upper(struct dentry *dentry);
int ksu_ovl_set_nlink_lower(struct dentry *dentry);
unsigned int ksu_ovl_get_nlink(struct ksu_ovl_fs *ofs, struct dentry *lowerdentry,
struct dentry *upperdentry,
unsigned int fallback);
int ksu_ovl_setattr(struct dentry *dentry, struct iattr *attr);
int ksu_ovl_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags);
int ksu_ovl_permission(struct inode *inode, int mask);
int ksu_ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
const void *value, size_t size, int flags);
int ksu_ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
void *value, size_t size, int flags);
ssize_t ksu_ovl_listxattr(struct dentry *dentry, char *list, size_t size);
struct posix_acl *ksu_ovl_get_acl(struct inode *inode, int type);
int ksu_ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
bool ksu_ovl_is_private_xattr(struct super_block *sb, const char *name);
struct ksu_ovl_inode_params {
struct inode *newinode;
struct dentry *upperdentry;
struct ksu_ovl_path *lowerpath;
bool index;
unsigned int numlower;
char *redirect;
struct dentry *lowerdata;
};
void ksu_ovl_inode_init(struct inode *inode, struct ksu_ovl_inode_params *oip,
unsigned long ino, int fsid);
struct inode *ksu_ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
struct inode *ksu_ovl_lookup_inode(struct super_block *sb, struct dentry *real,
bool is_upper);
bool ksu_ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir);
struct inode *ksu_ovl_get_trap_inode(struct super_block *sb, struct dentry *dir);
struct inode *ksu_ovl_get_inode(struct super_block *sb,
struct ksu_ovl_inode_params *oip);
static inline void ksu_ovl_copyattr(struct inode *from, struct inode *to)
{
to->i_uid = from->i_uid;
to->i_gid = from->i_gid;
to->i_mode = from->i_mode;
to->i_atime = from->i_atime;
to->i_mtime = from->i_mtime;
to->i_ctime = from->i_ctime;
i_size_write(to, i_size_read(from));
}
static inline void ksu_ovl_copyflags(struct inode *from, struct inode *to)
{
unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
inode_set_flags(to, from->i_flags & mask, mask);
}
/* dir.c */
extern const struct inode_operations ksu_ovl_dir_inode_operations;
int ksu_ovl_cleanup_and_whiteout(struct ksu_ovl_fs *ofs, struct inode *dir,
struct dentry *dentry);
struct ksu_ovl_cattr {
dev_t rdev;
umode_t mode;
const char *link;
struct dentry *hardlink;
};
#define KSU_OVL_CATTR(m) (&(struct ksu_ovl_cattr) { .mode = (m) })
int ksu_ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, umode_t mode);
struct dentry *ksu_ovl_create_real(struct inode *dir, struct dentry *newdentry,
struct ksu_ovl_cattr *attr);
int ksu_ovl_cleanup(struct inode *dir, struct dentry *dentry);
struct dentry *ksu_ovl_lookup_temp(struct dentry *workdir);
struct dentry *ksu_ovl_create_temp(struct dentry *workdir, struct ksu_ovl_cattr *attr);
/* file.c */
extern const struct file_operations ksu_ovl_file_operations;
int __init ksu_ovl_aio_request_cache_init(void);
void ksu_ovl_aio_request_cache_destroy(void);
long ksu_ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
long ksu_ovl_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
/* copy_up.c */
int ksu_ovl_copy_up(struct dentry *dentry);
int ksu_ovl_copy_up_with_data(struct dentry *dentry);
int ksu_ovl_maybe_copy_up(struct dentry *dentry, int flags);
int ksu_ovl_copy_xattr(struct super_block *sb, struct dentry *old,
struct dentry *new);
int ksu_ovl_set_attr(struct dentry *upper, struct kstat *stat);
struct ksu_ovl_fh *ksu_ovl_encode_real_fh(struct dentry *real, bool is_upper);
int ksu_ovl_set_origin(struct dentry *dentry, struct dentry *lower,
struct dentry *upper);
/* export.c */
extern const struct export_operations ksu_ovl_export_operations;

1232
kernel/ksufs/5.10/readdir.c Normal file

File diff suppressed because it is too large Load Diff

2139
kernel/ksufs/5.10/super.c Normal file

File diff suppressed because it is too large Load Diff

974
kernel/ksufs/5.10/util.c Normal file
View File

@@ -0,0 +1,974 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2011 Novell Inc.
* Copyright (C) 2016 Red Hat, Inc.
*/
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/slab.h>
#include <linux/cred.h>
#include <linux/xattr.h>
#include <linux/exportfs.h>
#include <linux/uuid.h>
#include <linux/namei.h>
#include <linux/ratelimit.h>
#include "overlayfs.h"
int ksu_ovl_want_write(struct dentry *dentry)
{
struct ksu_ovl_fs *ofs = dentry->d_sb->s_fs_info;
return mnt_want_write(ksu_ovl_upper_mnt(ofs));
}
void ksu_ovl_drop_write(struct dentry *dentry)
{
struct ksu_ovl_fs *ofs = dentry->d_sb->s_fs_info;
mnt_drop_write(ksu_ovl_upper_mnt(ofs));
}
struct dentry *ksu_ovl_workdir(struct dentry *dentry)
{
struct ksu_ovl_fs *ofs = dentry->d_sb->s_fs_info;
return ofs->workdir;
}
const struct cred *ksu_ovl_override_creds(struct super_block *sb)
{
struct ksu_ovl_fs *ofs = sb->s_fs_info;
if (!ofs->config.override_creds)
return NULL;
return override_creds(ofs->creator_cred);
}
void ksu_ovl_revert_creds(struct super_block *sb, const struct cred *old_cred)
{
if (old_cred)
revert_creds(old_cred);
}
/*
* Check if underlying fs supports file handles and try to determine encoding
* type, in order to deduce maximum inode number used by fs.
*
* Return 0 if file handles are not supported.
* Return 1 (FILEID_INO32_GEN) if fs uses the default 32bit inode encoding.
* Return -1 if fs uses a non default encoding with unknown inode size.
*/
int ksu_ovl_can_decode_fh(struct super_block *sb)
{
if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry)
return 0;
return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
}
struct dentry *ksu_ovl_indexdir(struct super_block *sb)
{
struct ksu_ovl_fs *ofs = sb->s_fs_info;
return ofs->indexdir;
}
/* Index all files on copy up. For now only enabled for NFS export */
bool ksu_ovl_index_all(struct super_block *sb)
{
struct ksu_ovl_fs *ofs = sb->s_fs_info;
return ofs->config.nfs_export && ofs->config.index;
}
/* Verify lower origin on lookup. For now only enabled for NFS export */
bool ksu_ovl_verify_lower(struct super_block *sb)
{
struct ksu_ovl_fs *ofs = sb->s_fs_info;
return ofs->config.nfs_export && ofs->config.index;
}
struct ksu_ovl_entry *ksu_ovl_alloc_entry(unsigned int numlower)
{
size_t size = offsetof(struct ksu_ovl_entry, lowerstack[numlower]);
struct ksu_ovl_entry *oe = kzalloc(size, GFP_KERNEL);
if (oe)
oe->numlower = numlower;
return oe;
}
bool ksu_ovl_dentry_remote(struct dentry *dentry)
{
return dentry->d_flags &
(DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
}
void ksu_ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry,
unsigned int mask)
{
struct ksu_ovl_entry *oe = KSU_OVL_E(dentry);
unsigned int i, flags = 0;
if (upperdentry)
flags |= upperdentry->d_flags;
for (i = 0; i < oe->numlower; i++)
flags |= oe->lowerstack[i].dentry->d_flags;
spin_lock(&dentry->d_lock);
dentry->d_flags &= ~mask;
dentry->d_flags |= flags & mask;
spin_unlock(&dentry->d_lock);
}
bool ksu_ovl_dentry_weird(struct dentry *dentry)
{
return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT |
DCACHE_MANAGE_TRANSIT |
DCACHE_OP_HASH |
DCACHE_OP_COMPARE);
}
enum ksu_ovl_path_type ksu_ovl_path_type(struct dentry *dentry)
{
struct ksu_ovl_entry *oe = dentry->d_fsdata;
enum ksu_ovl_path_type type = 0;
if (ksu_ovl_dentry_upper(dentry)) {
type = __KSU_OVL_PATH_UPPER;
/*
* Non-dir dentry can hold lower dentry of its copy up origin.
*/
if (oe->numlower) {
if (ksu_ovl_test_flag(KSU_OVL_CONST_INO, d_inode(dentry)))
type |= __KSU_OVL_PATH_ORIGIN;
if (d_is_dir(dentry) ||
!ksu_ovl_has_upperdata(d_inode(dentry)))
type |= __KSU_OVL_PATH_MERGE;
}
} else {
if (oe->numlower > 1)
type |= __KSU_OVL_PATH_MERGE;
}
return type;
}
void ksu_ovl_path_upper(struct dentry *dentry, struct path *path)
{
struct ksu_ovl_fs *ofs = dentry->d_sb->s_fs_info;
path->mnt = ksu_ovl_upper_mnt(ofs);
path->dentry = ksu_ovl_dentry_upper(dentry);
}
void ksu_ovl_path_lower(struct dentry *dentry, struct path *path)
{
struct ksu_ovl_entry *oe = dentry->d_fsdata;
if (oe->numlower) {
path->mnt = oe->lowerstack[0].layer->mnt;
path->dentry = oe->lowerstack[0].dentry;
} else {
*path = (struct path) { };
}
}
void ksu_ovl_path_lowerdata(struct dentry *dentry, struct path *path)
{
struct ksu_ovl_entry *oe = dentry->d_fsdata;
if (oe->numlower) {
path->mnt = oe->lowerstack[oe->numlower - 1].layer->mnt;
path->dentry = oe->lowerstack[oe->numlower - 1].dentry;
} else {
*path = (struct path) { };
}
}
enum ksu_ovl_path_type ksu_ovl_path_real(struct dentry *dentry, struct path *path)
{
enum ksu_ovl_path_type type = ksu_ovl_path_type(dentry);
if (!KSU_OVL_TYPE_UPPER(type))
ksu_ovl_path_lower(dentry, path);
else
ksu_ovl_path_upper(dentry, path);
return type;
}
struct dentry *ksu_ovl_dentry_upper(struct dentry *dentry)
{
return ksu_ovl_upperdentry_dereference(KSU_OVL_I(d_inode(dentry)));
}
struct dentry *ksu_ovl_dentry_lower(struct dentry *dentry)
{
struct ksu_ovl_entry *oe = dentry->d_fsdata;
return oe->numlower ? oe->lowerstack[0].dentry : NULL;
}
const struct ksu_ovl_layer *ksu_ovl_layer_lower(struct dentry *dentry)
{
struct ksu_ovl_entry *oe = dentry->d_fsdata;
return oe->numlower ? oe->lowerstack[0].layer : NULL;
}
/*
* ksu_ovl_dentry_lower() could return either a data dentry or metacopy dentry
* dependig on what is stored in lowerstack[0]. At times we need to find
* lower dentry which has data (and not metacopy dentry). This helper
* returns the lower data dentry.
*/
struct dentry *ksu_ovl_dentry_lowerdata(struct dentry *dentry)
{
struct ksu_ovl_entry *oe = dentry->d_fsdata;
return oe->numlower ? oe->lowerstack[oe->numlower - 1].dentry : NULL;
}
struct dentry *ksu_ovl_dentry_real(struct dentry *dentry)
{
return ksu_ovl_dentry_upper(dentry) ?: ksu_ovl_dentry_lower(dentry);
}
struct dentry *ksu_ovl_i_dentry_upper(struct inode *inode)
{
return ksu_ovl_upperdentry_dereference(KSU_OVL_I(inode));
}
struct inode *ksu_ovl_inode_upper(struct inode *inode)
{
struct dentry *upperdentry = ksu_ovl_i_dentry_upper(inode);
return upperdentry ? d_inode(upperdentry) : NULL;
}
struct inode *ksu_ovl_inode_lower(struct inode *inode)
{
return KSU_OVL_I(inode)->lower;
}
struct inode *ksu_ovl_inode_real(struct inode *inode)
{
return ksu_ovl_inode_upper(inode) ?: ksu_ovl_inode_lower(inode);
}
/* Return inode which contains lower data. Do not return metacopy */
struct inode *ksu_ovl_inode_lowerdata(struct inode *inode)
{
if (WARN_ON(!S_ISREG(inode->i_mode)))
return NULL;
return KSU_OVL_I(inode)->lowerdata ?: ksu_ovl_inode_lower(inode);
}
/* Return real inode which contains data. Does not return metacopy inode */
struct inode *ksu_ovl_inode_realdata(struct inode *inode)
{
struct inode *upperinode;
upperinode = ksu_ovl_inode_upper(inode);
if (upperinode && ksu_ovl_has_upperdata(inode))
return upperinode;
return ksu_ovl_inode_lowerdata(inode);
}
struct ksu_ovl_dir_cache *ksu_ovl_dir_cache(struct inode *inode)
{
return KSU_OVL_I(inode)->cache;
}
void ksu_ovl_set_dir_cache(struct inode *inode, struct ksu_ovl_dir_cache *cache)
{
KSU_OVL_I(inode)->cache = cache;
}
void ksu_ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry)
{
set_bit(flag, &KSU_OVL_E(dentry)->flags);
}
void ksu_ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry)
{
clear_bit(flag, &KSU_OVL_E(dentry)->flags);
}
bool ksu_ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry)
{
return test_bit(flag, &KSU_OVL_E(dentry)->flags);
}
bool ksu_ovl_dentry_is_opaque(struct dentry *dentry)
{
return ksu_ovl_dentry_test_flag(KSU_OVL_E_OPAQUE, dentry);
}
bool ksu_ovl_dentry_is_whiteout(struct dentry *dentry)
{
return !dentry->d_inode && ksu_ovl_dentry_is_opaque(dentry);
}
void ksu_ovl_dentry_set_opaque(struct dentry *dentry)
{
ksu_ovl_dentry_set_flag(KSU_OVL_E_OPAQUE, dentry);
}
/*
* For hard links and decoded file handles, it's possible for ksu_ovl_dentry_upper()
* to return positive, while there's no actual upper alias for the inode.
* Copy up code needs to know about the existence of the upper alias, so it
* can't use ksu_ovl_dentry_upper().
*/
bool ksu_ovl_dentry_has_upper_alias(struct dentry *dentry)
{
return ksu_ovl_dentry_test_flag(KSU_OVL_E_UPPER_ALIAS, dentry);
}
void ksu_ovl_dentry_set_upper_alias(struct dentry *dentry)
{
ksu_ovl_dentry_set_flag(KSU_OVL_E_UPPER_ALIAS, dentry);
}
static bool ksu_ovl_should_check_upperdata(struct inode *inode)
{
if (!S_ISREG(inode->i_mode))
return false;
if (!ksu_ovl_inode_lower(inode))
return false;
return true;
}
bool ksu_ovl_has_upperdata(struct inode *inode)
{
if (!ksu_ovl_should_check_upperdata(inode))
return true;
if (!ksu_ovl_test_flag(KSU_OVL_UPPERDATA, inode))
return false;
/*
* Pairs with smp_wmb() in ksu_ovl_set_upperdata(). Main user of
* ksu_ovl_has_upperdata() is ksu_ovl_copy_up_meta_inode_data(). Make sure
* if setting of KSU_OVL_UPPERDATA is visible, then effects of writes
* before that are visible too.
*/
smp_rmb();
return true;
}
void ksu_ovl_set_upperdata(struct inode *inode)
{
/*
* Pairs with smp_rmb() in ksu_ovl_has_upperdata(). Make sure
* if KSU_OVL_UPPERDATA flag is visible, then effects of write operations
* before it are visible as well.
*/
smp_wmb();
ksu_ovl_set_flag(KSU_OVL_UPPERDATA, inode);
}
/* Caller should hold ksu_ovl_inode->lock */
bool ksu_ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags)
{
if (!ksu_ovl_open_flags_need_copy_up(flags))
return false;
return !ksu_ovl_test_flag(KSU_OVL_UPPERDATA, d_inode(dentry));
}
bool ksu_ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags)
{
if (!ksu_ovl_open_flags_need_copy_up(flags))
return false;
return !ksu_ovl_has_upperdata(d_inode(dentry));
}
bool ksu_ovl_redirect_dir(struct super_block *sb)
{
struct ksu_ovl_fs *ofs = sb->s_fs_info;
return ofs->config.redirect_dir && !ofs->noxattr;
}
const char *ksu_ovl_dentry_get_redirect(struct dentry *dentry)
{
return KSU_OVL_I(d_inode(dentry))->redirect;
}
void ksu_ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
{
struct ksu_ovl_inode *oi = KSU_OVL_I(d_inode(dentry));
kfree(oi->redirect);
oi->redirect = redirect;
}
void ksu_ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
{
struct inode *upperinode = d_inode(upperdentry);
WARN_ON(KSU_OVL_I(inode)->__upperdentry);
/*
* Make sure upperdentry is consistent before making it visible
*/
smp_wmb();
KSU_OVL_I(inode)->__upperdentry = upperdentry;
if (inode_unhashed(inode)) {
inode->i_private = upperinode;
__insert_inode_hash(inode, (unsigned long) upperinode);
}
}
static void ksu_ovl_dir_version_inc(struct dentry *dentry, bool impurity)
{
struct inode *inode = d_inode(dentry);
WARN_ON(!inode_is_locked(inode));
WARN_ON(!d_is_dir(dentry));
/*
* Version is used by readdir code to keep cache consistent.
* For merge dirs (or dirs with origin) all changes need to be noted.
* For non-merge dirs, cache contains only impure entries (i.e. ones
* which have been copied up and have origins), so only need to note
* changes to impure entries.
*/
if (!ksu_ovl_dir_is_real(dentry) || impurity)
KSU_OVL_I(inode)->version++;
}
void ksu_ovl_dir_modified(struct dentry *dentry, bool impurity)
{
/* Copy mtime/ctime */
ksu_ovl_copyattr(d_inode(ksu_ovl_dentry_upper(dentry)), d_inode(dentry));
ksu_ovl_dir_version_inc(dentry, impurity);
}
u64 ksu_ovl_dentry_version_get(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
WARN_ON(!inode_is_locked(inode));
return KSU_OVL_I(inode)->version;
}
bool ksu_ovl_is_whiteout(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
return inode && IS_WHITEOUT(inode);
}
struct file *ksu_ovl_path_open(struct path *path, int flags)
{
struct inode *inode = d_inode(path->dentry);
int err, acc_mode;
if (flags & ~(O_ACCMODE | O_LARGEFILE))
BUG();
switch (flags & O_ACCMODE) {
case O_RDONLY:
acc_mode = MAY_READ;
break;
case O_WRONLY:
acc_mode = MAY_WRITE;
break;
default:
BUG();
}
err = inode_permission(inode, acc_mode | MAY_OPEN);
if (err)
return ERR_PTR(err);
/* O_NOATIME is an optimization, don't fail if not permitted */
if (inode_owner_or_capable(inode))
flags |= O_NOATIME;
return dentry_open(path, flags, current_cred());
}
/* Caller should hold ksu_ovl_inode->lock */
static bool ksu_ovl_already_copied_up_locked(struct dentry *dentry, int flags)
{
bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
if (ksu_ovl_dentry_upper(dentry) &&
(ksu_ovl_dentry_has_upper_alias(dentry) || disconnected) &&
!ksu_ovl_dentry_needs_data_copy_up_locked(dentry, flags))
return true;
return false;
}
bool ksu_ovl_already_copied_up(struct dentry *dentry, int flags)
{
bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
/*
* Check if copy-up has happened as well as for upper alias (in
* case of hard links) is there.
*
* Both checks are lockless:
* - false negatives: will recheck under oi->lock
* - false positives:
* + ksu_ovl_dentry_upper() uses memory barriers to ensure the
* upper dentry is up-to-date
* + ksu_ovl_dentry_has_upper_alias() relies on locking of
* upper parent i_rwsem to prevent reordering copy-up
* with rename.
*/
if (ksu_ovl_dentry_upper(dentry) &&
(ksu_ovl_dentry_has_upper_alias(dentry) || disconnected) &&
!ksu_ovl_dentry_needs_data_copy_up(dentry, flags))
return true;
return false;
}
int ksu_ovl_copy_up_start(struct dentry *dentry, int flags)
{
struct inode *inode = d_inode(dentry);
int err;
err = ksu_ovl_inode_lock_interruptible(inode);
if (!err && ksu_ovl_already_copied_up_locked(dentry, flags)) {
err = 1; /* Already copied up */
ksu_ovl_inode_unlock(inode);
}
return err;
}
void ksu_ovl_copy_up_end(struct dentry *dentry)
{
ksu_ovl_inode_unlock(d_inode(dentry));
}
bool ksu_ovl_check_origin_xattr(struct ksu_ovl_fs *ofs, struct dentry *dentry)
{
ssize_t res;
res = ksu_ovl_do_getxattr(ofs, dentry, KSU_OVL_XATTR_ORIGIN, NULL, 0);
/* Zero size value means "copied up but origin unknown" */
if (res >= 0)
return true;
return false;
}
bool ksu_ovl_check_dir_xattr(struct super_block *sb, struct dentry *dentry,
enum ksu_ovl_xattr ox)
{
ssize_t res;
char val;
if (!d_is_dir(dentry))
return false;
res = ksu_ovl_do_getxattr(KSU_OVL_FS(sb), dentry, ox, &val, 1);
if (res == 1 && val == 'y')
return true;
return false;
}
#define KSU_OVL_XATTR_OPAQUE_POSTFIX "opaque"
#define KSU_OVL_XATTR_REDIRECT_POSTFIX "redirect"
#define KSU_OVL_XATTR_ORIGIN_POSTFIX "origin"
#define KSU_OVL_XATTR_IMPURE_POSTFIX "impure"
#define KSU_OVL_XATTR_NLINK_POSTFIX "nlink"
#define KSU_OVL_XATTR_UPPER_POSTFIX "upper"
#define KSU_OVL_XATTR_METACOPY_POSTFIX "metacopy"
#define KSU_OVL_XATTR_TAB_ENTRY(x) \
[x] = KSU_OVL_XATTR_PREFIX x ## _POSTFIX
const char *ksu_ovl_xattr_table[] = {
KSU_OVL_XATTR_TAB_ENTRY(KSU_OVL_XATTR_OPAQUE),
KSU_OVL_XATTR_TAB_ENTRY(KSU_OVL_XATTR_REDIRECT),
KSU_OVL_XATTR_TAB_ENTRY(KSU_OVL_XATTR_ORIGIN),
KSU_OVL_XATTR_TAB_ENTRY(KSU_OVL_XATTR_IMPURE),
KSU_OVL_XATTR_TAB_ENTRY(KSU_OVL_XATTR_NLINK),
KSU_OVL_XATTR_TAB_ENTRY(KSU_OVL_XATTR_UPPER),
KSU_OVL_XATTR_TAB_ENTRY(KSU_OVL_XATTR_METACOPY),
};
int ksu_ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
enum ksu_ovl_xattr ox, const void *value, size_t size,
int xerr)
{
int err;
struct ksu_ovl_fs *ofs = dentry->d_sb->s_fs_info;
if (ofs->noxattr)
return xerr;
err = ksu_ovl_do_setxattr(ofs, upperdentry, ox, value, size);
if (err == -EOPNOTSUPP) {
pr_warn("cannot set %s xattr on upper\n", ksu_ovl_xattr(ofs, ox));
ofs->noxattr = true;
return xerr;
}
return err;
}
int ksu_ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
{
int err;
if (ksu_ovl_test_flag(KSU_OVL_IMPURE, d_inode(dentry)))
return 0;
/*
* Do not fail when upper doesn't support xattrs.
* Upper inodes won't have origin nor redirect xattr anyway.
*/
err = ksu_ovl_check_setxattr(dentry, upperdentry, KSU_OVL_XATTR_IMPURE,
"y", 1, 0);
if (!err)
ksu_ovl_set_flag(KSU_OVL_IMPURE, d_inode(dentry));
return err;
}
/**
* Caller must hold a reference to inode to prevent it from being freed while
* it is marked inuse.
*/
bool ksu_ovl_inuse_trylock(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
bool locked = false;
spin_lock(&inode->i_lock);
if (!(inode->i_state & I_OVL_INUSE)) {
inode->i_state |= I_OVL_INUSE;
locked = true;
}
spin_unlock(&inode->i_lock);
return locked;
}
void ksu_ovl_inuse_unlock(struct dentry *dentry)
{
if (dentry) {
struct inode *inode = d_inode(dentry);
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_OVL_INUSE));
inode->i_state &= ~I_OVL_INUSE;
spin_unlock(&inode->i_lock);
}
}
bool ksu_ovl_is_inuse(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
bool inuse;
spin_lock(&inode->i_lock);
inuse = (inode->i_state & I_OVL_INUSE);
spin_unlock(&inode->i_lock);
return inuse;
}
/*
* Does this overlay dentry need to be indexed on copy up?
*/
bool ksu_ovl_need_index(struct dentry *dentry)
{
struct dentry *lower = ksu_ovl_dentry_lower(dentry);
if (!lower || !ksu_ovl_indexdir(dentry->d_sb))
return false;
/* Index all files for NFS export and consistency verification */
if (ksu_ovl_index_all(dentry->d_sb))
return true;
/* Index only lower hardlinks on copy up */
if (!d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
return true;
return false;
}
/* Caller must hold KSU_OVL_I(inode)->lock */
static void ksu_ovl_cleanup_index(struct dentry *dentry)
{
struct dentry *indexdir = ksu_ovl_indexdir(dentry->d_sb);
struct inode *dir = indexdir->d_inode;
struct dentry *lowerdentry = ksu_ovl_dentry_lower(dentry);
struct dentry *upperdentry = ksu_ovl_dentry_upper(dentry);
struct dentry *index = NULL;
struct inode *inode;
struct qstr name = { };
int err;
err = ksu_ovl_get_index_name(lowerdentry, &name);
if (err)
goto fail;
inode = d_inode(upperdentry);
if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
upperdentry, inode->i_ino, inode->i_nlink);
/*
* We either have a bug with persistent union nlink or a lower
* hardlink was added while overlay is mounted. Adding a lower
* hardlink and then unlinking all overlay hardlinks would drop
* overlay nlink to zero before all upper inodes are unlinked.
* As a safety measure, when that situation is detected, set
* the overlay nlink to the index inode nlink minus one for the
* index entry itself.
*/
set_nlink(d_inode(dentry), inode->i_nlink - 1);
ksu_ovl_set_nlink_upper(dentry);
goto out;
}
inode_lock_nested(dir, I_MUTEX_PARENT);
index = lookup_one_len(name.name, indexdir, name.len);
err = PTR_ERR(index);
if (IS_ERR(index)) {
index = NULL;
} else if (ksu_ovl_index_all(dentry->d_sb)) {
/* Whiteout orphan index to block future open by handle */
err = ksu_ovl_cleanup_and_whiteout(KSU_OVL_FS(dentry->d_sb),
dir, index);
} else {
/* Cleanup orphan index entries */
err = ksu_ovl_cleanup(dir, index);
}
inode_unlock(dir);
if (err)
goto fail;
out:
kfree(name.name);
dput(index);
return;
fail:
pr_err("cleanup index of '%pd2' failed (%i)\n", dentry, err);
goto out;
}
/*
* Operations that change overlay inode and upper inode nlink need to be
* synchronized with copy up for persistent nlink accounting.
*/
int ksu_ovl_nlink_start(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
const struct cred *old_cred;
int err;
if (WARN_ON(!inode))
return -ENOENT;
/*
* With inodes index is enabled, we store the union overlay nlink
* in an xattr on the index inode. When whiting out an indexed lower,
* we need to decrement the overlay persistent nlink, but before the
* first copy up, we have no upper index inode to store the xattr.
*
* As a workaround, before whiteout/rename over an indexed lower,
* copy up to create the upper index. Creating the upper index will
* initialize the overlay nlink, so it could be dropped if unlink
* or rename succeeds.
*
* TODO: implement metadata only index copy up when called with
* ksu_ovl_copy_up_flags(dentry, O_PATH).
*/
if (ksu_ovl_need_index(dentry) && !ksu_ovl_dentry_has_upper_alias(dentry)) {
err = ksu_ovl_copy_up(dentry);
if (err)
return err;
}
err = ksu_ovl_inode_lock_interruptible(inode);
if (err)
return err;
if (d_is_dir(dentry) || !ksu_ovl_test_flag(KSU_OVL_INDEX, inode))
goto out;
old_cred = ksu_ovl_override_creds(dentry->d_sb);
/*
* The overlay inode nlink should be incremented/decremented IFF the
* upper operation succeeds, along with nlink change of upper inode.
* Therefore, before link/unlink/rename, we store the union nlink
* value relative to the upper inode nlink in an upper inode xattr.
*/
err = ksu_ovl_set_nlink_upper(dentry);
ksu_ovl_revert_creds(dentry->d_sb, old_cred);
out:
if (err)
ksu_ovl_inode_unlock(inode);
return err;
}
void ksu_ovl_nlink_end(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
if (ksu_ovl_test_flag(KSU_OVL_INDEX, inode) && inode->i_nlink == 0) {
const struct cred *old_cred;
old_cred = ksu_ovl_override_creds(dentry->d_sb);
ksu_ovl_cleanup_index(dentry);
ksu_ovl_revert_creds(dentry->d_sb, old_cred);
}
ksu_ovl_inode_unlock(inode);
}
int ksu_ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
{
/* Workdir should not be the same as upperdir */
if (workdir == upperdir)
goto err;
/* Workdir should not be subdir of upperdir and vice versa */
if (lock_rename(workdir, upperdir) != NULL)
goto err_unlock;
return 0;
err_unlock:
unlock_rename(workdir, upperdir);
err:
pr_err("failed to lock workdir+upperdir\n");
return -EIO;
}
/* err < 0, 0 if no metacopy xattr, 1 if metacopy xattr found */
int ksu_ovl_check_metacopy_xattr(struct ksu_ovl_fs *ofs, struct dentry *dentry)
{
ssize_t res;
/* Only regular files can have metacopy xattr */
if (!S_ISREG(d_inode(dentry)->i_mode))
return 0;
res = ksu_ovl_do_getxattr(ofs, dentry, KSU_OVL_XATTR_METACOPY, NULL, 0);
if (res < 0) {
if (res == -ENODATA || res == -EOPNOTSUPP)
return 0;
goto out;
}
return 1;
out:
pr_warn_ratelimited("failed to get metacopy (%zi)\n", res);
return res;
}
bool ksu_ovl_is_metacopy_dentry(struct dentry *dentry)
{
struct ksu_ovl_entry *oe = dentry->d_fsdata;
if (!d_is_reg(dentry))
return false;
if (ksu_ovl_dentry_upper(dentry)) {
if (!ksu_ovl_has_upperdata(d_inode(dentry)))
return true;
return false;
}
return (oe->numlower > 1);
}
char *ksu_ovl_get_redirect_xattr(struct ksu_ovl_fs *ofs, struct dentry *dentry,
int padding)
{
int res;
char *s, *next, *buf = NULL;
res = ksu_ovl_do_getxattr(ofs, dentry, KSU_OVL_XATTR_REDIRECT, NULL, 0);
if (res == -ENODATA || res == -EOPNOTSUPP)
return NULL;
if (res < 0)
goto fail;
if (res == 0)
goto invalid;
buf = kzalloc(res + padding + 1, GFP_KERNEL);
if (!buf)
return ERR_PTR(-ENOMEM);
res = ksu_ovl_do_getxattr(ofs, dentry, KSU_OVL_XATTR_REDIRECT, buf, res);
if (res < 0)
goto fail;
if (res == 0)
goto invalid;
if (buf[0] == '/') {
for (s = buf; *s++ == '/'; s = next) {
next = strchrnul(s, '/');
if (s == next)
goto invalid;
}
} else {
if (strchr(buf, '/') != NULL)
goto invalid;
}
return buf;
invalid:
pr_warn_ratelimited("invalid redirect (%s)\n", buf);
res = -EINVAL;
goto err_free;
fail:
pr_warn_ratelimited("failed to get redirect (%i)\n", res);
err_free:
kfree(buf);
return ERR_PTR(res);
}
/*
* ksu_ovl_sync_status() - Check fs sync status for volatile mounts
*
* Returns 1 if this is not a volatile mount and a real sync is required.
*
* Returns 0 if syncing can be skipped because mount is volatile, and no errors
* have occurred on the upperdir since the mount.
*
* Returns -errno if it is a volatile mount, and the error that occurred since
* the last mount. If the error code changes, it'll return the latest error
* code.
*/
int ksu_ovl_sync_status(struct ksu_ovl_fs *ofs)
{
struct vfsmount *mnt;
if (ksu_ovl_should_sync(ofs))
return 1;
mnt = ksu_ovl_upper_mnt(ofs);
if (!mnt)
return 0;
return errseq_check(&mnt->mnt_sb->s_wb_err, ofs->errseq);
}

1
kernel/ksufs/Makefile Normal file
View File

@@ -0,0 +1 @@
obj-y += 5.10/

60
kernel/ksufs/README.md Normal file
View File

@@ -0,0 +1,60 @@
# KSUFS
based on overlayfs
## Step1
```bash
sed -i 's/ovl_/ksu_ovl_/g' *
sed -i 's/OVL_/KSU_OVL_/g' *
mv ovl_entry.h ksu_ovl_entry.h
sed -i 's/I_KSU_OVL_INUSE/I_OVL_INUSE/g' *
```
## Step2
overlayfs.h:
#define pr_fmt(fmt) "overlayfs: " fmt
#define pr_fmt(fmt) "ksufs: " fmt
## Step3
```c
static struct file_system_type ksu_ovl_fs_type = {
.owner = THIS_MODULE,
.name = "overlay",
.mount = ksu_ovl_mount,
.kill_sb = kill_anon_super,
};
MODULE_ALIAS_FS("overlay");
```
```c
static struct file_system_type ksu_ovl_fs_type = {
.owner = THIS_MODULE,
.name = "ksufs",
.mount = ksu_ovl_mount,
.kill_sb = kill_anon_super,
};
MODULE_ALIAS_FS("ksufs");
```
## Step4
ksu_ovl_getattr:
if (err)
if (true)
## Step5
Makefile:
obj-y += ksufs.o
ksufs-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \
copy_up.o export.o