diff --git a/etc/syscalls_linux_aarch64.md b/etc/syscalls_linux_aarch64.md index c50e003..a820752 100644 --- a/etc/syscalls_linux_aarch64.md +++ b/etc/syscalls_linux_aarch64.md @@ -90,8 +90,8 @@ | 0x57 (87) | timerfd_gettime | (int ufd, struct __kernel_itimerspec *otmr) | __arm64_sys_timerfd_gettime | false | | 0x58 (88) | utimensat | (int dfd, const char *filename, struct __kernel_timespec *utimes, int flags) | __arm64_sys_utimensat | true | | 0x59 (89) | acct | (const char *name) | __arm64_sys_acct | false | -| 0x5a (90) | capget | (cap_user_header_t header, cap_user_data_t dataptr) | __arm64_sys_capget | false | -| 0x5b (91) | capset | (cap_user_header_t header, const cap_user_data_t data) | __arm64_sys_capset | false | +| 0x5a (90) | capget | (cap_user_header_t header, cap_user_data_t dataptr) | __arm64_sys_capget | true | +| 0x5b (91) | capset | (cap_user_header_t header, const cap_user_data_t data) | __arm64_sys_capset | true | | 0x5c (92) | personality | (unsigned int personality) | __arm64_sys_arm64_personality | false | | 0x5d (93) | exit | (int error_code) | __arm64_sys_exit | true | | 0x5e (94) | exit_group | (int error_code) | __arm64_sys_exit_group | true | diff --git a/libkernel/src/fs/attr.rs b/libkernel/src/fs/attr.rs index 2019e8c..2e90f36 100644 --- a/libkernel/src/fs/attr.rs +++ b/libkernel/src/fs/attr.rs @@ -1,6 +1,9 @@ use crate::{ error::{KernelError, Result}, - proc::ids::{Gid, Uid}, + proc::{ + caps::{Capabilities, CapabilitiesFlags}, + ids::{Gid, Uid}, + }, }; use super::{FileType, InodeId}; @@ -88,8 +91,20 @@ impl FileAttr { /// # Arguments /// * `uid` - The user-ID that will be checked against this file's uid field. /// * `gid` - The group-ID that will be checked against this file's uid field. + /// * `caps` - The capabilities of the user. /// * `requested_mode` - A bitmask of `AccessMode` flags (`R_OK`, `W_OK`, `X_OK`) to check. - pub fn check_access(&self, uid: Uid, gid: Gid, requested_mode: AccessMode) -> Result<()> { + pub fn check_access( + &self, + uid: Uid, + gid: Gid, + caps: Capabilities, + requested_mode: AccessMode, + ) -> Result<()> { + // For filesystem related tasks, the CAP_DAC_OVERRIDE bypasses all permission checks. + if caps.is_capable(CapabilitiesFlags::CAP_DAC_OVERRIDE) { + return Ok(()); + } + // root (UID 0) bypasses most permission checks. For execute, at // least one execute bit must be set. if uid.is_root() { @@ -119,6 +134,7 @@ impl FileAttr { if requested_mode.contains(AccessMode::R_OK) && !perms_to_check.contains(FilePermissions::S_IRUSR) + && !caps.is_capable(CapabilitiesFlags::CAP_DAC_READ_SEARCH) { return Err(KernelError::NotPermitted); } @@ -129,6 +145,8 @@ impl FileAttr { } if requested_mode.contains(AccessMode::X_OK) && !perms_to_check.contains(FilePermissions::S_IXUSR) + && (self.file_type != FileType::Directory // CAP_DAC_READ_SEARCH allows directory search as well + || !caps.is_capable(CapabilitiesFlags::CAP_DAC_READ_SEARCH)) { return Err(KernelError::NotPermitted); } @@ -164,8 +182,13 @@ mod tests { fn root_can_read_without_perms() { let file = setup_file(FilePermissions::empty()); assert!( - file.check_access(ROOT_UID, ROOT_GID, AccessMode::R_OK) - .is_ok() + file.check_access( + ROOT_UID, + ROOT_GID, + Capabilities::new_empty(), + AccessMode::R_OK + ) + .is_ok() ); } @@ -173,15 +196,25 @@ mod tests { fn root_can_write_without_perms() { let file = setup_file(FilePermissions::empty()); assert!( - file.check_access(ROOT_UID, ROOT_GID, AccessMode::W_OK) - .is_ok() + file.check_access( + ROOT_UID, + ROOT_GID, + Capabilities::new_empty(), + AccessMode::W_OK + ) + .is_ok() ); } #[test] fn root_cannot_execute_if_no_exec_bits_are_set() { let file = setup_file(FilePermissions::S_IRUSR | FilePermissions::S_IWUSR); - let result = file.check_access(ROOT_UID, ROOT_GID, AccessMode::X_OK); + let result = file.check_access( + ROOT_UID, + ROOT_GID, + Capabilities::new_empty(), + AccessMode::X_OK, + ); assert!(matches!(result, Err(KernelError::NotPermitted))); } @@ -189,8 +222,13 @@ mod tests { fn root_can_execute_if_owner_exec_bit_is_set() { let file = setup_file(FilePermissions::S_IXUSR); assert!( - file.check_access(ROOT_UID, ROOT_GID, AccessMode::X_OK) - .is_ok() + file.check_access( + ROOT_UID, + ROOT_GID, + Capabilities::new_empty(), + AccessMode::X_OK + ) + .is_ok() ); } @@ -198,8 +236,13 @@ mod tests { fn root_can_execute_if_group_exec_bit_is_set() { let file = setup_file(FilePermissions::S_IXGRP); assert!( - file.check_access(ROOT_UID, ROOT_GID, AccessMode::X_OK) - .is_ok() + file.check_access( + ROOT_UID, + ROOT_GID, + Capabilities::new_empty(), + AccessMode::X_OK + ) + .is_ok() ); } @@ -207,8 +250,13 @@ mod tests { fn root_can_execute_if_other_exec_bit_is_set() { let file = setup_file(FilePermissions::S_IXOTH); assert!( - file.check_access(ROOT_UID, ROOT_GID, AccessMode::X_OK) - .is_ok() + file.check_access( + ROOT_UID, + ROOT_GID, + Capabilities::new_empty(), + AccessMode::X_OK + ) + .is_ok() ); } @@ -216,15 +264,25 @@ mod tests { fn owner_can_read_when_permitted() { let file = setup_file(FilePermissions::S_IRUSR); assert!( - file.check_access(OWNER_UID, OWNER_GID, AccessMode::R_OK) - .is_ok() + file.check_access( + OWNER_UID, + OWNER_GID, + Capabilities::new_empty(), + AccessMode::R_OK + ) + .is_ok() ); } #[test] fn owner_cannot_read_when_denied() { let file = setup_file(FilePermissions::S_IWUSR | FilePermissions::S_IXUSR); - let result = file.check_access(OWNER_UID, OWNER_GID, AccessMode::R_OK); + let result = file.check_access( + OWNER_UID, + OWNER_GID, + Capabilities::new_empty(), + AccessMode::R_OK, + ); assert!(matches!(result, Err(KernelError::NotPermitted))); } @@ -232,15 +290,25 @@ mod tests { fn owner_can_write_when_permitted() { let file = setup_file(FilePermissions::S_IWUSR); assert!( - file.check_access(OWNER_UID, OWNER_GID, AccessMode::W_OK) - .is_ok() + file.check_access( + OWNER_UID, + OWNER_GID, + Capabilities::new_empty(), + AccessMode::W_OK + ) + .is_ok() ); } #[test] fn owner_cannot_write_when_denied() { let file = setup_file(FilePermissions::S_IRUSR); - let result = file.check_access(OWNER_UID, OWNER_GID, AccessMode::W_OK); + let result = file.check_access( + OWNER_UID, + OWNER_GID, + Capabilities::new_empty(), + AccessMode::W_OK, + ); assert!(matches!(result, Err(KernelError::NotPermitted))); } @@ -250,14 +318,17 @@ mod tests { FilePermissions::S_IRUSR | FilePermissions::S_IWUSR | FilePermissions::S_IXUSR, ); let mode = AccessMode::R_OK | AccessMode::W_OK | AccessMode::X_OK; - assert!(file.check_access(OWNER_UID, OWNER_GID, mode).is_ok()); + assert!( + file.check_access(OWNER_UID, OWNER_GID, Capabilities::new_empty(), mode) + .is_ok() + ); } #[test] fn owner_access_denied_if_one_of_many_perms_is_missing() { let file = setup_file(FilePermissions::S_IRUSR | FilePermissions::S_IXUSR); let mode = AccessMode::R_OK | AccessMode::W_OK | AccessMode::X_OK; // Requesting Write is denied - let result = file.check_access(OWNER_UID, OWNER_GID, mode); + let result = file.check_access(OWNER_UID, OWNER_GID, Capabilities::new_empty(), mode); assert!(matches!(result, Err(KernelError::NotPermitted))); } @@ -265,22 +336,37 @@ mod tests { fn group_member_can_read_when_permitted() { let file = setup_file(FilePermissions::S_IRGRP); assert!( - file.check_access(GROUP_MEMBER_UID, FILE_GROUP_GID, AccessMode::R_OK) - .is_ok() + file.check_access( + GROUP_MEMBER_UID, + FILE_GROUP_GID, + Capabilities::new_empty(), + AccessMode::R_OK + ) + .is_ok() ); } #[test] fn group_member_cannot_write_when_owner_can() { let file = setup_file(FilePermissions::S_IWUSR | FilePermissions::S_IRGRP); - let result = file.check_access(GROUP_MEMBER_UID, FILE_GROUP_GID, AccessMode::W_OK); + let result = file.check_access( + GROUP_MEMBER_UID, + FILE_GROUP_GID, + Capabilities::new_empty(), + AccessMode::W_OK, + ); assert!(matches!(result, Err(KernelError::NotPermitted))); } #[test] fn group_member_cannot_read_when_denied() { let file = setup_file(FilePermissions::S_IWGRP); - let result = file.check_access(GROUP_MEMBER_UID, FILE_GROUP_GID, AccessMode::R_OK); + let result = file.check_access( + GROUP_MEMBER_UID, + FILE_GROUP_GID, + Capabilities::new_empty(), + AccessMode::R_OK, + ); assert!(matches!(result, Err(KernelError::NotPermitted))); } @@ -288,22 +374,37 @@ mod tests { fn other_can_execute_when_permitted() { let file = setup_file(FilePermissions::S_IXOTH); assert!( - file.check_access(OTHER_UID, OTHER_GID, AccessMode::X_OK) - .is_ok() + file.check_access( + OTHER_UID, + OTHER_GID, + Capabilities::new_empty(), + AccessMode::X_OK + ) + .is_ok() ); } #[test] fn other_cannot_read_when_only_owner_and_group_can() { let file = setup_file(FilePermissions::S_IRUSR | FilePermissions::S_IRGRP); - let result = file.check_access(OTHER_UID, OTHER_GID, AccessMode::R_OK); + let result = file.check_access( + OTHER_UID, + OTHER_GID, + Capabilities::new_empty(), + AccessMode::R_OK, + ); assert!(matches!(result, Err(KernelError::NotPermitted))); } #[test] fn other_cannot_write_when_denied() { let file = setup_file(FilePermissions::S_IROTH); - let result = file.check_access(OTHER_UID, OTHER_GID, AccessMode::W_OK); + let result = file.check_access( + OTHER_UID, + OTHER_GID, + Capabilities::new_empty(), + AccessMode::W_OK, + ); assert!(matches!(result, Err(KernelError::NotPermitted))); } @@ -312,8 +413,13 @@ mod tests { // Checking for nothing should always succeed if the file exists. let file = setup_file(FilePermissions::empty()); assert!( - file.check_access(OTHER_UID, OTHER_GID, AccessMode::empty()) - .is_ok() + file.check_access( + OTHER_UID, + OTHER_GID, + Capabilities::new_empty(), + AccessMode::empty() + ) + .is_ok() ); } @@ -322,8 +428,66 @@ mod tests { let file = setup_file(FilePermissions::S_IROTH); // Only other can read // This user is not the owner and not in the file's group. assert!( - file.check_access(GROUP_MEMBER_UID, OTHER_GID, AccessMode::R_OK) - .is_ok() + file.check_access( + GROUP_MEMBER_UID, + OTHER_GID, + Capabilities::new_empty(), + AccessMode::R_OK + ) + .is_ok() ); } + + #[test] + fn cap_dac_override_can_read_write_exec_without_perms() { + let file = setup_file(FilePermissions::empty()); + let mode = AccessMode::R_OK | AccessMode::W_OK | AccessMode::X_OK; + assert!( + file.check_access( + ROOT_UID, + ROOT_GID, + Capabilities::new_cap(CapabilitiesFlags::CAP_DAC_OVERRIDE), + mode, + ) + .is_ok() + ); + } + + #[test] + fn cap_dac_read_search_can_read_without_perms() { + let file = setup_file(FilePermissions::empty()); + assert!( + file.check_access( + OTHER_UID, + OTHER_GID, + Capabilities::new_cap(CapabilitiesFlags::CAP_DAC_READ_SEARCH), + AccessMode::R_OK, + ) + .is_ok() + ); + } + + #[test] + fn cap_dac_read_search_cannot_write_without_perms() { + let file = setup_file(FilePermissions::empty()); + let result = file.check_access( + OTHER_UID, + OTHER_GID, + Capabilities::new_cap(CapabilitiesFlags::CAP_DAC_READ_SEARCH), + AccessMode::W_OK, + ); + assert!(matches!(result, Err(KernelError::NotPermitted))); + } + + #[test] + fn cap_dac_read_search_cannot_exec_without_perms() { + let file = setup_file(FilePermissions::empty()); + let result = file.check_access( + OTHER_UID, + OTHER_GID, + Capabilities::new_cap(CapabilitiesFlags::CAP_DAC_READ_SEARCH), + AccessMode::X_OK, + ); + assert!(matches!(result, Err(KernelError::NotPermitted))); + } } diff --git a/libkernel/src/proc/caps.rs b/libkernel/src/proc/caps.rs new file mode 100644 index 0000000..30d1d19 --- /dev/null +++ b/libkernel/src/proc/caps.rs @@ -0,0 +1,165 @@ +use crate::error::{KernelError, Result}; + +bitflags::bitflags! { + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub struct CapabilitiesFlags: u64 { + const CAP_CHOWN = 1 << 0; + const CAP_DAC_OVERRIDE = 1 << 1; + const CAP_DAC_READ_SEARCH = 1 << 2; + const CAP_FOWNER = 1 << 3; + const CAP_FSETID = 1 << 4; + const CAP_KILL = 1 << 5; + const CAP_SETGID = 1 << 6; + const CAP_SETUID = 1 << 7; + const CAP_SETPCAP = 1 << 8; + const CAP_LINUX_IMMUTABLE = 1 << 9; + const CAP_NET_BIND_SERVICE = 1 << 10; + const CAP_NET_BROADCAST = 1 << 11; + const CAP_NET_ADMIN = 1 << 12; + const CAP_NET_RAW = 1 << 13; + const CAP_IPC_LOCK = 1 << 14; + const CAP_IPC_OWNER = 1 << 15; + const CAP_SYS_MODULE = 1 << 16; + const CAP_SYS_RAWIO = 1 << 17; + const CAP_SYS_CHROOT = 1 << 18; + const CAP_SYS_PTRACE = 1 << 19; + const CAP_SYS_PACCT = 1 << 20; + const CAP_SYS_ADMIN = 1 << 21; + const CAP_SYS_BOOT = 1 << 22; + const CAP_SYS_NICE = 1 << 23; + const CAP_SYS_RESOURCE = 1 << 24; + const CAP_SYS_TIME = 1 << 25; + const CAP_SYS_TTY_CONFIG = 1 << 26; + const CAP_MKNOD = 1 << 27; + const CAP_LEASE = 1 << 28; + const CAP_AUDIT_WRITE = 1 << 29; + const CAP_AUDIT_CONTROL = 1 << 30; + const CAP_SETFCAP = 1 << 31; + const CAP_MAC_OVERRIDE = 1 << 32; + const CAP_MAC_ADMIN = 1 << 33; + const CAP_SYSLOG = 1 << 34; + const CAP_WAKE_ALARM = 1 << 35; + const CAP_BLOCK_SUSPEND = 1 << 36; + const CAP_AUDIT_READ = 1 << 37; + const CAP_PERFMON = 1 << 38; + const CAP_BPF = 1 << 39; + const CAP_CHECKPOINT_RESTORE = 1 << 40; + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Capabilities { + effective: CapabilitiesFlags, + permitted: CapabilitiesFlags, + inheritable: CapabilitiesFlags, + ambient: CapabilitiesFlags, + bounding: CapabilitiesFlags, +} + +impl Capabilities { + pub fn new( + effective: CapabilitiesFlags, + permitted: CapabilitiesFlags, + inheritable: CapabilitiesFlags, + ambient: CapabilitiesFlags, + bounding: CapabilitiesFlags, + ) -> Self { + Self { + effective, + permitted, + inheritable, + ambient, + bounding, + } + } + + pub fn new_root() -> Self { + Self { + effective: CapabilitiesFlags::all(), + permitted: CapabilitiesFlags::all(), + inheritable: CapabilitiesFlags::all(), + ambient: CapabilitiesFlags::all(), + bounding: CapabilitiesFlags::all(), + } + } + + pub fn new_empty() -> Self { + Self { + effective: CapabilitiesFlags::empty(), + permitted: CapabilitiesFlags::empty(), + inheritable: CapabilitiesFlags::empty(), + ambient: CapabilitiesFlags::empty(), + bounding: CapabilitiesFlags::empty(), + } + } + + /// Convenience method for creating capabilities with a single capability + pub fn new_cap(cap: CapabilitiesFlags) -> Self { + Self { + effective: cap, + permitted: cap, + inheritable: cap, + ambient: cap, + bounding: cap, + } + } + + /// Set the publicly mutable fields on capabilities + pub fn set_public( + &mut self, + caller_caps: Capabilities, + effective: CapabilitiesFlags, + permitted: CapabilitiesFlags, + inheritable: CapabilitiesFlags, + ) -> Result<()> { + // permitted should be a subset of self.permitted, and effective should be a subset of permitted + // inheritable should be a subset of self.bounding, or caller's effective should contain CAP_SETPCAP + if !self.permitted.contains(permitted) + || !permitted.contains(effective) + || (!self.bounding.contains(inheritable) + && !caller_caps + .effective + .contains(CapabilitiesFlags::CAP_SETPCAP)) + { + return Err(KernelError::NotPermitted); + } + self.effective = effective; + self.permitted = permitted; + self.inheritable = inheritable; + Ok(()) + } + + pub fn effective(&self) -> CapabilitiesFlags { + self.effective + } + + pub fn permitted(&self) -> CapabilitiesFlags { + self.permitted + } + + pub fn inheritable(&self) -> CapabilitiesFlags { + self.inheritable + } + + pub fn ambient(&self) -> CapabilitiesFlags { + self.ambient + } + + pub fn bounding(&self) -> CapabilitiesFlags { + self.bounding + } + + /// Checks if a capability is effective, as in if it can be used. + pub fn is_capable(&self, cap: CapabilitiesFlags) -> bool { + self.effective.contains(cap) + } + + /// Shortcut for returning EPERM if a capability is not effective. + pub fn check_capable(&self, cap: CapabilitiesFlags) -> Result<()> { + if !self.effective.contains(cap) { + Err(KernelError::NotPermitted) + } else { + Ok(()) + } + } +} diff --git a/libkernel/src/proc/mod.rs b/libkernel/src/proc/mod.rs index ff6b00d..d11c71f 100644 --- a/libkernel/src/proc/mod.rs +++ b/libkernel/src/proc/mod.rs @@ -1 +1,2 @@ +pub mod caps; pub mod ids; diff --git a/src/arch/arm64/exceptions/syscall.rs b/src/arch/arm64/exceptions/syscall.rs index 4c480da..fb32be0 100644 --- a/src/arch/arm64/exceptions/syscall.rs +++ b/src/arch/arm64/exceptions/syscall.rs @@ -40,6 +40,7 @@ use crate::{ mmap::{sys_mmap, sys_mprotect, sys_munmap}, }, process::{ + caps::{sys_capget, sys_capset}, clone::sys_clone, creds::{ sys_getegid, sys_geteuid, sys_getgid, sys_getresgid, sys_getresuid, sys_gettid, @@ -149,13 +150,13 @@ pub async fn handle_syscall() { sys_fchownat( arg1.into(), TUA::from_value(arg2 as _), - arg3.into(), - arg4.into(), + arg3 as _, + arg4 as _, arg5 as _, ) .await } - 0x37 => sys_fchown(arg1.into(), arg2.into(), arg3.into()).await, + 0x37 => sys_fchown(arg1.into(), arg2 as _, arg3 as _).await, 0x38 => { sys_openat( arg1.into(), @@ -270,6 +271,8 @@ pub async fn handle_syscall() { ) .await } + 0x5a => sys_capget(TUA::from_value(arg1 as _), TUA::from_value(arg2 as _)).await, + 0x5b => sys_capset(TUA::from_value(arg1 as _), TUA::from_value(arg2 as _)).await, 0x5d => sys_exit(arg1 as _).await, 0x5e => sys_exit_group(arg1 as _), 0x60 => sys_set_tid_address(TUA::from_value(arg1 as _)), diff --git a/src/clock/timespec.rs b/src/clock/timespec.rs index ef553b9..91eaa5a 100644 --- a/src/clock/timespec.rs +++ b/src/clock/timespec.rs @@ -10,8 +10,8 @@ use crate::memory::uaccess::{UserCopyable, copy_from_user}; #[repr(C)] #[derive(Clone, Copy, Debug)] pub struct TimeSpec { - tv_sec: i64, - tv_nsec: u64, + pub tv_sec: i64, + pub tv_nsec: u64, } unsafe impl UserCopyable for TimeSpec {} diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 4d7e391..ccd597d 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -9,6 +9,7 @@ use libkernel::error::{FsError, KernelError, Result}; use libkernel::fs::attr::FilePermissions; use libkernel::fs::path::Path; use libkernel::fs::{BlockDevice, FS_ID_START, FileType, Filesystem, Inode, InodeId, OpenFlags}; +use libkernel::proc::caps::CapabilitiesFlags; use open_file::OpenFile; use reg::RegFile; @@ -506,16 +507,30 @@ impl VFS { // Determine the parent directory inode in which to perform the unlink. let parent_inode = if let Some(parent_path) = path.parent() { - self.resolve_path(parent_path, root.clone(), task).await? + self.resolve_path(parent_path, root.clone(), task.clone()) + .await? } else { root.clone() }; + let parent_attr = parent_inode.getattr().await?; + // Ensure the parent really is a directory. - if parent_inode.getattr().await?.file_type != FileType::Directory { + if parent_attr.file_type != FileType::Directory { return Err(FsError::NotADirectory.into()); } + { + let creds = task.creds.lock_save_irq(); + + if attr.mode.contains(FilePermissions::S_ISVTX) + && attr.uid != creds.euid() + && parent_attr.uid != creds.euid() + { + creds.caps().check_capable(CapabilitiesFlags::CAP_FOWNER)?; + } + } + // Extract the final component (name) and perform the unlink on the parent. let name = path.file_name().ok_or(FsError::InvalidInput)?; diff --git a/src/fs/syscalls/at/access.rs b/src/fs/syscalls/at/access.rs index a6a6ee9..2bf8190 100644 --- a/src/fs/syscalls/at/access.rs +++ b/src/fs/syscalls/at/access.rs @@ -41,5 +41,7 @@ pub async fn sys_faccessat2(dirfd: Fd, path: TUA, mode: i32, flags: i32) (creds.uid(), creds.gid()) }; - attrs.check_access(uid, gid, access_mode).map(|_| 0) + attrs + .check_access(uid, gid, creds.caps(), access_mode) + .map(|_| 0) } diff --git a/src/fs/syscalls/at/chmod.rs b/src/fs/syscalls/at/chmod.rs index 4a32045..9509c6a 100644 --- a/src/fs/syscalls/at/chmod.rs +++ b/src/fs/syscalls/at/chmod.rs @@ -1,18 +1,25 @@ use core::ffi::c_char; use libkernel::{ - error::Result, + error::{KernelError, Result}, fs::{attr::FilePermissions, path::Path}, memory::address::TUA, + proc::{caps::CapabilitiesFlags, ids::Uid}, }; +use ringbuf::Arc; use crate::{ fs::syscalls::at::{AtFlags, resolve_at_start_node, resolve_path_flags}, memory::uaccess::cstr::UserCStr, - process::fd_table::Fd, + process::{Task, fd_table::Fd}, sched::current_task, }; +pub fn can_chmod(task: Arc, uid: Uid) -> bool { + let creds = task.creds.lock_save_irq(); + creds.caps().is_capable(CapabilitiesFlags::CAP_FOWNER) || creds.uid() == uid +} + pub async fn sys_fchmodat(dirfd: Fd, path: TUA, mode: u16, flags: i32) -> Result { let flags = AtFlags::from_bits_retain(flags); @@ -23,9 +30,13 @@ pub async fn sys_fchmodat(dirfd: Fd, path: TUA, mode: u16, flags: i32) - let start_node = resolve_at_start_node(dirfd, path).await?; let mode = FilePermissions::from_bits_retain(mode); - let node = resolve_path_flags(dirfd, path, start_node, task, flags).await?; + let node = resolve_path_flags(dirfd, path, start_node, task.clone(), flags).await?; let mut attr = node.getattr().await?; + if !can_chmod(task, attr.uid) { + return Err(KernelError::NotPermitted); + } + attr.mode = mode; node.setattr(attr).await?; diff --git a/src/fs/syscalls/at/chown.rs b/src/fs/syscalls/at/chown.rs index a999a62..48ea936 100644 --- a/src/fs/syscalls/at/chown.rs +++ b/src/fs/syscalls/at/chown.rs @@ -4,7 +4,10 @@ use libkernel::{ error::Result, fs::path::Path, memory::address::TUA, - proc::ids::{Gid, Uid}, + proc::{ + caps::CapabilitiesFlags, + ids::{Gid, Uid}, + }, }; use crate::{ @@ -17,8 +20,8 @@ use crate::{ pub async fn sys_fchownat( dirfd: Fd, path: TUA, - owner: Uid, - group: Gid, + owner: i32, + group: i32, flags: i32, ) -> Result { let mut buf = [0; 1024]; @@ -28,11 +31,24 @@ pub async fn sys_fchownat( let start_node = resolve_at_start_node(dirfd, path).await?; let flags = AtFlags::from_bits_retain(flags); - let node = resolve_path_flags(dirfd, path, start_node, task, flags).await?; + let node = resolve_path_flags(dirfd, path, start_node, task.clone(), flags).await?; let mut attr = node.getattr().await?; - attr.uid = owner; - attr.gid = group; + { + let creds = task.creds.lock_save_irq(); + if owner != -1 { + creds.caps().check_capable(CapabilitiesFlags::CAP_CHOWN)?; + attr.uid = Uid::new(owner as _); + } + if group != -1 { + let gid = Gid::new(group as _); + // doesnt seem like theres real groups so this is as good as it gets + if creds.uid() != attr.uid || creds.gid() != gid { + creds.caps().check_capable(CapabilitiesFlags::CAP_CHOWN)?; + } + attr.gid = gid; + } + } node.setattr(attr).await?; Ok(0) diff --git a/src/fs/syscalls/at/link.rs b/src/fs/syscalls/at/link.rs index 8482b76..644c8fc 100644 --- a/src/fs/syscalls/at/link.rs +++ b/src/fs/syscalls/at/link.rs @@ -4,6 +4,7 @@ use libkernel::{ error::{FsError, KernelError, Result}, fs::{FileType, path::Path}, memory::address::TUA, + proc::caps::CapabilitiesFlags, }; use crate::{ @@ -36,6 +37,16 @@ pub async fn sys_linkat( flags.insert(AtFlags::AT_SYMLINK_NOFOLLOW); } + if flags.contains(AtFlags::AT_EMPTY_PATH) + && !task + .creds + .lock_save_irq() + .caps() + .is_capable(CapabilitiesFlags::CAP_DAC_READ_SEARCH) + { + return Err(FsError::NotFound.into()); // weird error but thats what linkat(2) says + } + let old_path = Path::new( UserCStr::from_ptr(old_path) .copy_from_user(&mut buf) diff --git a/src/fs/syscalls/at/rename.rs b/src/fs/syscalls/at/rename.rs index fd82bad..4a0af92 100644 --- a/src/fs/syscalls/at/rename.rs +++ b/src/fs/syscalls/at/rename.rs @@ -2,8 +2,9 @@ use core::ffi::c_char; use libkernel::{ error::{FsError, KernelError, Result}, - fs::{FileType, path::Path}, + fs::{FileType, attr::FilePermissions, path::Path}, memory::address::TUA, + proc::caps::CapabilitiesFlags, }; use crate::{ @@ -87,6 +88,31 @@ pub async fn sys_renameat2( return Err(FsError::NotADirectory.into()); } + { + let old_parent_attr = old_parent_inode.getattr().await?; + let old_attr = old_parent_inode.lookup(old_name).await?.getattr().await?; + let new_parent_attr = new_parent_inode.getattr().await?; + let new_attr = match new_parent_inode.lookup(new_name).await { + Ok(attr) => Some(attr.getattr().await?), + Err(_) => None, + }; + + let creds = task.creds.lock_save_irq(); + + if (old_attr.mode.contains(FilePermissions::S_ISVTX) + && old_attr.uid != creds.euid() + && old_parent_attr.uid != creds.euid()) + || new_parent_attr.uid != creds.euid() + { + creds.caps().check_capable(CapabilitiesFlags::CAP_FOWNER)?; + } else if let Some(new_attr) = new_attr + && new_attr.mode.contains(FilePermissions::S_ISVTX) + && new_attr.uid != creds.euid() + { + creds.caps().check_capable(CapabilitiesFlags::CAP_FOWNER)?; + } + } + if exchange { VFS.exchange(old_parent_inode, old_name, new_parent_inode, new_name) .await?; diff --git a/src/fs/syscalls/at/utime.rs b/src/fs/syscalls/at/utime.rs index c0bb529..f063408 100644 --- a/src/fs/syscalls/at/utime.rs +++ b/src/fs/syscalls/at/utime.rs @@ -1,19 +1,27 @@ use core::ffi::c_char; use libkernel::{ - error::{KernelError, Result}, - fs::path::Path, + error::{FsError, KernelError, Result}, + fs::{ + attr::{AccessMode, FileAttr}, + path::Path, + }, memory::address::TUA, + proc::caps::CapabilitiesFlags, }; +use ringbuf::Arc; use crate::{ clock::{realtime::date, timespec::TimeSpec}, current_task, fs::syscalls::at::{AtFlags, resolve_at_start_node, resolve_path_flags}, memory::uaccess::{copy_from_user, cstr::UserCStr}, - process::fd_table::Fd, + process::{Task, fd_table::Fd}, }; +const UTIME_NOW: u64 = (1 << 30) - 1; +const UTIME_OMIT: u64 = (1 << 30) - 2; + pub async fn sys_utimensat( dirfd: Fd, path: TUA, @@ -37,19 +45,43 @@ pub async fn sys_utimensat( let start_node = resolve_at_start_node(dirfd, path).await?; let flags = AtFlags::from_bits_retain(flags); - resolve_path_flags(dirfd, path, start_node, task, flags).await? + resolve_path_flags(dirfd, path, start_node, task.clone(), flags).await? }; let mut attr = node.getattr().await?; if times.is_null() { + test_creds(task, &attr)?; attr.atime = date(); attr.mtime = date(); attr.ctime = date(); } else { let times = copy_from_user(times).await?; - attr.atime = times[0].into(); - attr.mtime = times[1].into(); + if times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW { + test_creds(task, &attr)?; + } else if times[0].tv_nsec != UTIME_OMIT && times[1].tv_nsec != UTIME_OMIT { + let creds = task.creds.lock_save_irq(); + if creds.euid() != attr.uid + && !creds.caps().is_capable(CapabilitiesFlags::CAP_FOWNER) + && !creds.caps().is_capable(CapabilitiesFlags::CAP_DAC_OVERRIDE) + { + return Err(FsError::PermissionDenied.into()); + } + } + + let atime = match times[0].tv_nsec { + UTIME_NOW => date(), + UTIME_OMIT => attr.atime, + _ => times[0].into(), + }; + let mtime = match times[1].tv_nsec { + UTIME_NOW => date(), + UTIME_OMIT => attr.mtime, + _ => times[1].into(), + }; + + attr.atime = atime; + attr.mtime = mtime; attr.ctime = date(); } @@ -57,3 +89,18 @@ pub async fn sys_utimensat( Ok(0) } + +fn test_creds(task: Arc, attr: &FileAttr) -> Result<()> { + let creds = task.creds.lock_save_irq(); + if attr + .check_access(creds.uid(), creds.gid(), creds.caps(), AccessMode::W_OK) + .is_err() + && creds.euid() != attr.uid + && !creds.caps().is_capable(CapabilitiesFlags::CAP_FOWNER) + && !creds.caps().is_capable(CapabilitiesFlags::CAP_DAC_OVERRIDE) + { + Err(FsError::PermissionDenied.into()) + } else { + Ok(()) + } +} diff --git a/src/fs/syscalls/chdir.rs b/src/fs/syscalls/chdir.rs index 755fb18..6707f12 100644 --- a/src/fs/syscalls/chdir.rs +++ b/src/fs/syscalls/chdir.rs @@ -10,6 +10,7 @@ use libkernel::{ error::{KernelError, Result}, fs::path::Path, memory::address::{TUA, UA}, + proc::caps::CapabilitiesFlags, }; pub async fn sys_getcwd(buf: UA, len: usize) -> Result { @@ -43,10 +44,15 @@ pub async fn sys_chdir(path: TUA) -> Result { } pub async fn sys_chroot(path: TUA) -> Result { + let task = current_task(); + task.creds + .lock_save_irq() + .caps() + .check_capable(CapabilitiesFlags::CAP_SYS_CHROOT)?; + let mut buf = [0; 1024]; let path = Path::new(UserCStr::from_ptr(path).copy_from_user(&mut buf).await?); - let task = current_task(); let current_path = task.root.lock_save_irq().0.clone(); let new_path = task.root.lock_save_irq().1.join(path); diff --git a/src/fs/syscalls/chmod.rs b/src/fs/syscalls/chmod.rs index ce33486..8ebb2bc 100644 --- a/src/fs/syscalls/chmod.rs +++ b/src/fs/syscalls/chmod.rs @@ -1,3 +1,4 @@ +use super::at::chmod::can_chmod; use libkernel::{ error::{KernelError, Result}, fs::attr::FilePermissions, @@ -17,6 +18,10 @@ pub async fn sys_fchmod(fd: Fd, mode: u16) -> Result { let inode = file.inode().ok_or(KernelError::BadFd)?; let mut attr = inode.getattr().await?; + if !can_chmod(task, attr.uid) { + return Err(KernelError::NotPermitted); + } + attr.mode = mode; inode.setattr(attr).await?; diff --git a/src/fs/syscalls/chown.rs b/src/fs/syscalls/chown.rs index 8a420d1..51d22ee 100644 --- a/src/fs/syscalls/chown.rs +++ b/src/fs/syscalls/chown.rs @@ -1,11 +1,14 @@ use libkernel::{ error::{KernelError, Result}, - proc::ids::{Gid, Uid}, + proc::{ + caps::CapabilitiesFlags, + ids::{Gid, Uid}, + }, }; use crate::{process::fd_table::Fd, sched::current_task}; -pub async fn sys_fchown(fd: Fd, owner: Uid, group: Gid) -> Result { +pub async fn sys_fchown(fd: Fd, owner: i32, group: i32) -> Result { let task = current_task(); let file = task .fd_table @@ -16,8 +19,21 @@ pub async fn sys_fchown(fd: Fd, owner: Uid, group: Gid) -> Result { let inode = file.inode().ok_or(KernelError::BadFd)?; let mut attr = inode.getattr().await?; - attr.uid = owner; - attr.gid = group; + { + let creds = task.creds.lock_save_irq(); + if owner != -1 { + creds.caps().check_capable(CapabilitiesFlags::CAP_CHOWN)?; + attr.uid = Uid::new(owner as _); + } + if group != -1 { + let gid = Gid::new(group as _); + // doesnt seem like theres real groups so this is as good as it gets + if creds.uid() != attr.uid || creds.gid() != gid { + creds.caps().check_capable(CapabilitiesFlags::CAP_CHOWN)?; + } + attr.gid = gid; + } + } inode.setattr(attr).await?; Ok(0) diff --git a/src/kernel/power.rs b/src/kernel/power.rs index e0cd0f7..569ab60 100644 --- a/src/kernel/power.rs +++ b/src/kernel/power.rs @@ -1,7 +1,16 @@ -use crate::{ArchImpl, arch::Arch}; -use libkernel::error::{KernelError, Result}; +use crate::{ArchImpl, arch::Arch, sched::current_task}; +use libkernel::{ + error::{KernelError, Result}, + proc::caps::CapabilitiesFlags, +}; pub async fn sys_reboot(magic: u32, magic2: u32, op: u32, _arg: usize) -> Result { + current_task() + .creds + .lock_save_irq() + .caps() + .check_capable(CapabilitiesFlags::CAP_SYS_BOOT)?; + const LINUX_REBOOT_MAGIC1: u32 = 0xfee1_dead; const LINUX_REBOOT_MAGIC2: u32 = 672274793; const LINUX_REBOOT_MAGIC2A: u32 = 852072454; diff --git a/src/process/caps.rs b/src/process/caps.rs new file mode 100644 index 0000000..893656a --- /dev/null +++ b/src/process/caps.rs @@ -0,0 +1,140 @@ +use crate::{ + current_task, + memory::uaccess::{ + UserCopyable, copy_from_user, copy_obj_array_from_user, copy_objs_to_user, copy_to_user, + }, + process::TASK_LIST, +}; +use libkernel::{ + error::{KernelError, Result}, + memory::address::TUA, + proc::caps::{Capabilities, CapabilitiesFlags}, +}; + +const LINUX_CAPABILITY_VERSION_1: u32 = 0x19980330; +const LINUX_CAPABILITY_VERSION_3: u32 = 0x20080522; + +#[repr(C)] +#[derive(Debug, Default, Clone, Copy)] +pub struct CapUserHeader { + version: u32, + pid: i32, +} + +#[repr(C)] +#[derive(Debug, Default, Clone, Copy)] +pub struct CapUserData { + effective: u32, + permitted: u32, + inheritable: u32, +} + +impl CapUserData { + fn from_caps(caps: Capabilities) -> [Self; 2] { + [ + Self { + effective: caps.effective().bits() as u32, + permitted: caps.permitted().bits() as u32, + inheritable: caps.inheritable().bits() as u32, + }, + Self { + effective: (caps.effective().bits() >> 32) as u32, + permitted: (caps.permitted().bits() >> 32) as u32, + inheritable: (caps.inheritable().bits() >> 32) as u32, + }, + ] + } +} + +unsafe impl UserCopyable for CapUserHeader {} +unsafe impl UserCopyable for CapUserData {} + +pub async fn sys_capget(hdrp: TUA, datap: TUA) -> Result { + let mut header = copy_from_user(hdrp).await?; + + let task = if header.pid == 0 { + current_task() + } else { + TASK_LIST + .lock_save_irq() + .iter() + .find(|task| task.0.tgid.value() == header.pid as _) + .and_then(|task| task.1.upgrade()) + .ok_or(KernelError::NoProcess)? + }; + match header.version { + LINUX_CAPABILITY_VERSION_1 => { + let caps = task.creds.lock_save_irq().caps(); + let caps = CapUserData::from_caps(caps); + copy_to_user(datap, caps[0]).await?; + } + LINUX_CAPABILITY_VERSION_3 => { + let caps = task.creds.lock_save_irq().caps(); + let caps = CapUserData::from_caps(caps); + copy_objs_to_user(&caps, datap).await?; + } + _ => { + header.version = LINUX_CAPABILITY_VERSION_3; + copy_to_user(hdrp, header).await?; + return Err(KernelError::InvalidValue); + } + } + Ok(0) +} + +pub async fn sys_capset(hdrp: TUA, datap: TUA) -> Result { + let mut header = copy_from_user(hdrp).await?; + + let caller_caps = current_task().creds.lock_save_irq().caps(); + let task = if header.pid == 0 { + current_task() + } else { + caller_caps.check_capable(CapabilitiesFlags::CAP_SETPCAP)?; + TASK_LIST + .lock_save_irq() + .iter() + .find(|task| task.0.tgid.value() == header.pid as _) + .and_then(|task| task.1.upgrade()) + .ok_or(KernelError::NoProcess)? + }; + + let (effective, permitted, inheritable) = match header.version { + LINUX_CAPABILITY_VERSION_1 => { + let datap = copy_from_user(datap).await?; + let effective = CapabilitiesFlags::from_bits_retain(datap.effective as _); + let permitted = CapabilitiesFlags::from_bits_retain(datap.permitted as _); + let inheritable = CapabilitiesFlags::from_bits_retain(datap.inheritable as _); + + (effective, permitted, inheritable) + } + LINUX_CAPABILITY_VERSION_3 => { + let datap: [CapUserData; 2] = copy_obj_array_from_user(datap, 2) + .await? + .try_into() + .map_err(|_| KernelError::InvalidValue)?; + let effective = CapabilitiesFlags::from_bits_retain( + ((datap[1].effective as u64) << 32) | datap[0].effective as u64, + ); + let permitted = CapabilitiesFlags::from_bits_retain( + ((datap[1].permitted as u64) << 32) | datap[0].permitted as u64, + ); + let inheritable = CapabilitiesFlags::from_bits_retain( + ((datap[1].inheritable as u64) << 32) | datap[0].inheritable as u64, + ); + + (effective, permitted, inheritable) + } + _ => { + header.version = LINUX_CAPABILITY_VERSION_3; + copy_to_user(hdrp, header).await?; + return Err(KernelError::InvalidValue); + } + }; + + let mut creds = task.creds.lock_save_irq(); + creds + .caps + .set_public(caller_caps, effective, permitted, inheritable)?; + + Ok(0) +} diff --git a/src/process/creds.rs b/src/process/creds.rs index 6b90a1d..4680256 100644 --- a/src/process/creds.rs +++ b/src/process/creds.rs @@ -7,7 +7,10 @@ use crate::{ use libkernel::{ error::Result, memory::address::TUA, - proc::ids::{Gid, Uid}, + proc::{ + caps::Capabilities, + ids::{Gid, Uid}, + }, }; unsafe impl UserCopyable for Uid {} @@ -21,6 +24,7 @@ pub struct Credentials { gid: Gid, egid: Gid, sgid: Gid, + pub(super) caps: Capabilities, } impl Credentials { @@ -32,6 +36,7 @@ impl Credentials { gid: Gid::new_root_group(), egid: Gid::new_root_group(), sgid: Gid::new_root_group(), + caps: Capabilities::new_root(), } } @@ -58,6 +63,10 @@ impl Credentials { pub fn sgid(&self) -> Gid { self.sgid } + + pub fn caps(&self) -> Capabilities { + self.caps + } } pub fn sys_getuid() -> core::result::Result { diff --git a/src/process/mod.rs b/src/process/mod.rs index 81b5cd1..0eaee49 100644 --- a/src/process/mod.rs +++ b/src/process/mod.rs @@ -29,6 +29,7 @@ use thread_group::{ signal::{SigId, SigSet, SignalState}, }; +pub mod caps; pub mod clone; pub mod creds; pub mod ctx;