Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kernel object improvements #162

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions lightswitch-object/src/buildid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,17 @@ use std::fmt::Display;
use std::fmt::Formatter;
use std::str;

use anyhow::Result;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bull_bull_derivative

(this is actually fine I just needed an excuse to use the gif)

use data_encoding::HEXLOWER;
use ring::digest::Digest;

/// Compact identifier for executable files.
///
/// Compact identifier for executable files derived from the first 8 bytes
/// of the build id. By using this smaller type for object files less memory
/// is used and also comparison, and other operations are cheaper.
pub type ExecutableId = u64;

#[derive(Hash, Eq, PartialEq, Clone)]
pub enum BuildIdFlavour {
Gnu,
Expand Down Expand Up @@ -44,7 +52,12 @@ impl BuildId {
}
}

pub fn build_id_formatted(&self) -> String {
/// Returns an identifier for the executable using the first 8 bytes of the build id.
pub fn id(&self) -> Result<ExecutableId> {
Ok(u64::from_ne_bytes(self.data[..8].try_into()?))
}

pub fn short(&self) -> String {
match self.flavour {
BuildIdFlavour::Gnu => {
self.data
Expand All @@ -66,7 +79,7 @@ impl BuildId {
}

pub fn formatted(&self) -> String {
format!("{}-{}", self.flavour, self.build_id_formatted())
format!("{}-{}", self.flavour, self.short())
}
}

Expand Down
134 changes: 134 additions & 0 deletions lightswitch-object/src/kernel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
use anyhow::anyhow;
use object::elf::{FileHeader32, FileHeader64, ELF_NOTE_GNU, NT_GNU_BUILD_ID, PT_NOTE};
use object::read::elf::FileHeader;
use object::read::elf::NoteIterator;
use object::read::elf::ProgramHeader;
use object::Endianness;
use object::FileKind;
use object::ReadCache;
use std::fs::File;

use crate::BuildId;

const KCORE_PATH: &str = "/proc/kcore";
const VMCORE_INFO_NAME: &[u8] = b"VMCOREINFO";
const KERNEL_OFFSET: &[u8] = b"KERNELOFFSET";

/// Parse the GNU build id from the ELF notes section.
pub fn parse_gnu_build_id_from_notes(data: &[u8]) -> Result<BuildId, anyhow::Error> {
let notes: NoteIterator<'_, FileHeader32<Endianness>> =
NoteIterator::new(Endianness::Little, 4, data)?;

for note in notes {
let Ok(note) = note else {
continue;
};

let name = note.name();
let ntype = note.n_type(Endianness::Little);

if name != ELF_NOTE_GNU || ntype != NT_GNU_BUILD_ID {
continue;
}

return Ok(BuildId::gnu_from_bytes(note.desc()));
}

Err(anyhow!("no GNU build id note found"))
}

/// Read KASLR information extracted off the notes of the vmlinux corefile.
fn _parse_vm_core_info_line(data: &[u8]) -> impl Iterator<Item = (&[u8], &[u8])> {
data.split(|&e| e == b'\n').filter_map(|key_val| {
let mut split = key_val.split(|&e| e == b'=');
match (split.next(), split.next()) {
(Some(a), Some(b)) => Some((a, b)),
(_, _) => None,
}
})
}

/// Extract the KASLR offset from the running vmlinux.
pub fn kaslr_offset() -> anyhow::Result<u64> {
let data = ReadCache::new(File::open(KCORE_PATH)?);

match FileKind::parse(&data) {
Ok(FileKind::Elf64) => {
let header: &FileHeader64<Endianness> = FileHeader64::<Endianness>::parse(&data)?;
let endian = header.endian()?;
let headers = header.program_headers(endian, &data)?;

for header in headers {
if header.p_type(endian) != PT_NOTE {
continue;
}

let notes: NoteIterator<'_, FileHeader64<Endianness>> = NoteIterator::new(
Endianness::Little,
header.p_align(endian),
header
.data(endian, &data)
.map_err(|_| anyhow!("invalid header data"))?,
)?;

for note in notes {
let Ok(note) = note else {
continue;
};

if note.name() == VMCORE_INFO_NAME {
let found = _parse_vm_core_info_line(note.desc())
.find(|(key, _val)| key == &KERNEL_OFFSET)
.map(|(_key, val)| val);

return Ok(
// This entry is stored in hex-encoded ascii. It could be converted in one go
// but this is not performance sensitive as it runs once. It's ok to take 2 hops
// to convert it rather than hand rolling it or bringing another dependency.
u64::from_str_radix(std::str::from_utf8(found.unwrap())?, 16)?,
);
}
}
}
}
Ok(_) => {
todo!("only 64 bit ELF kcore is supported")
}
Err(_) => {}
}

Err(anyhow!("could not find the kASLR offset"))
}

#[cfg(test)]
mod tests {
use std::fs::File;
use std::io::Read;

use crate::kernel::*;
use crate::*;

#[test]
fn test_parse_gnu_build_id_from_notes() {
let mut file = File::open("src/testdata/fedora-kernel-notes").unwrap();
let mut data = Vec::new();
file.read_to_end(&mut data).unwrap();

assert_eq!(
parse_gnu_build_id_from_notes(&data).unwrap(),
BuildId {
flavour: buildid::BuildIdFlavour::Gnu,
data: vec![
184, 215, 12, 245, 25, 250, 197, 165, 204, 205, 218, 26, 97, 195, 137, 149,
189, 155, 48, 89
],
}
);
}

#[test]
fn test_aslr_offset() {
println!("{:x}", kaslr_offset().unwrap());
assert!(kaslr_offset().is_ok());
}
}
4 changes: 3 additions & 1 deletion lightswitch-object/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
mod buildid;
pub mod kernel;
mod object;

pub use object::code_hash;
pub use object::ElfLoad;
pub use object::ObjectFile;
pub use object::{ElfLoad, ExecutableId};

pub use buildid::BuildId;
pub use buildid::ExecutableId;
11 changes: 2 additions & 9 deletions lightswitch-object/src/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use anyhow::{anyhow, Result};
use memmap2::Mmap;
use ring::digest::{Context, Digest, SHA256};

use crate::BuildId;
use object::elf::{FileHeader32, FileHeader64, PT_LOAD};
use object::read::elf::FileHeader;
use object::read::elf::ProgramHeader;
Expand All @@ -16,13 +15,7 @@ use object::Object;
use object::ObjectKind;
use object::ObjectSection;

/// Compact identifier for executable files.
///
/// Compact identifier for executable files derived from the first 8 bytes
/// of the hash of the code stored in the .text ELF segment. By using this
/// smaller type for object files less memory is used and also comparison,
/// and other operations are cheaper.
pub type ExecutableId = u64;
use crate::{BuildId, ExecutableId};

/// Elf load segments used during address normalization to find the segment
/// for what an code address falls into.
Expand Down Expand Up @@ -65,7 +58,7 @@ impl ObjectFile {

/// Returns an identifier for the executable using the first 8 bytes of the build id.
pub fn id(&self) -> Result<ExecutableId> {
Ok(u64::from_ne_bytes(self.build_id.data[..8].try_into()?))
self.build_id.id()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

noice

}

/// Returns the executable build ID.
Expand Down
Binary file added lightswitch-object/src/testdata/fedora-kernel-notes
Binary file not shown.
148 changes: 148 additions & 0 deletions src/kernel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
use std::fs::File;
use std::io::BufReader;
use std::io::Read;

use anyhow::anyhow;
use procfs;

use crate::ksym::KsymIter;
use lightswitch_object::kernel::parse_gnu_build_id_from_notes;
use lightswitch_object::BuildId;

pub const KERNEL_PID: i32 = 0;

#[derive(Debug)]
pub struct KernelCodeRange {
pub name: String,
pub build_id: BuildId,
pub start: u64,
pub end: u64,
}

pub struct AddressRange {
pub start: u64,
pub end: u64,
}

/// Lists all kernel code ranges. This includes the kernel image and the loaded
/// modules.
pub fn get_all_kernel_modules() -> Result<Vec<KernelCodeRange>, anyhow::Error> {
let mut code_sections = _list_modules()?;
let address_range = kernel_addresses()?;
code_sections.push(KernelCodeRange {
name: "[vmlinux]".into(),
build_id: kernel_build_id()?,
start: address_range.start,
end: address_range.end,
});
Ok(code_sections)
}

/// List all kernel modules.
fn _list_modules() -> Result<Vec<KernelCodeRange>, anyhow::Error> {
let mut modules = Vec::new();

for (_, info) in procfs::modules()? {
if info.state != "Live" {
continue;
}

let Ok(start) = _module_start_address(&info.name) else {
continue;
};

let Ok(build_id) = _get_module_build_id(&info.name) else {
continue;
};

modules.push(KernelCodeRange {
name: info.name,
build_id,
start,
end: start + info.size as u64,
});
}

Ok(modules)
}

/// Read and parse the build id of a given kernel module.
fn _get_module_build_id(module_name: &str) -> Result<BuildId, anyhow::Error> {
let mut file = BufReader::new(File::open(format!(
"/sys/module/{}/notes/.note.gnu.build-id",
module_name
))?);
let mut data = Vec::new();
file.read_to_end(&mut data)?;

parse_gnu_build_id_from_notes(&data)
Comment on lines +75 to +78
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at it, would it make sense to pass the reader directly to the parse_gnu_build_id_from_notes()? Or does the iterator not work on readers?

}

/// Finds the virtual address at which a given kernel module is loaded.
fn _module_start_address(module_name: &str) -> Result<u64, anyhow::Error> {
let mut file = File::open(format!("/sys/module/{}/sections/.text", module_name))?;
let mut buffer = [0; 8];
file.read_exact(&mut buffer)?;

Ok(u64::from_ne_bytes(buffer))
}

/// Read and parse the build id of the running kernel image.
/// This can also be done using `perf` with `perf buildid-list --kernel`.
pub fn kernel_build_id() -> Result<BuildId, anyhow::Error> {
let mut file = BufReader::new(File::open("/sys/kernel/notes")?);
let mut data = Vec::new();
file.read_to_end(&mut data)?;

parse_gnu_build_id_from_notes(&data)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here

}

/// Finds the loaded kernel image virtual address range.
pub fn kernel_addresses() -> Result<AddressRange, anyhow::Error> {
let mut kernel_start_address = None;
let mut kernel_end_address = None;

for ksym in KsymIter::from_kallsyms() {
if let (Some(start), Some(end)) = (kernel_start_address, kernel_end_address) {
return Ok(AddressRange { start, end });
}

if ksym.symbol_name == "_stext" {
kernel_start_address = Some(ksym.start_addr);
}

if ksym.symbol_name == "_etext" {
kernel_end_address = Some(ksym.start_addr);
}
}

match (kernel_start_address, kernel_end_address) {
(Some(kernel_start_address), Some(kernel_end_address)) => Ok(AddressRange {
start: kernel_start_address,
end: kernel_end_address,
}),
(_, _) => Err(anyhow!(
"could not find start and end kernel code addresses"
)),
}
}

#[cfg(test)]
mod tests {
use crate::kernel::*;

#[test]
fn kernel_code_ranges() {
let kernel_code_ranges = get_all_kernel_modules();
assert!(kernel_code_ranges.is_ok());
let kernel_code_ranges = kernel_code_ranges.unwrap();
assert_eq!(
kernel_code_ranges
.iter()
.find(|el| el.name == "[vmlinux]")
.iter()
.len(),
1
);
}
}
9 changes: 6 additions & 3 deletions src/ksym.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,12 @@ impl<R: Read> Iterator for KsymIter<R> {
if let (Some(addr_str), Some(symbol_type), Some(symbol_name)) =
(iter.next(), iter.next(), iter.next())
{
// This list is probably not complete
// https://github.com/torvalds/linux/blob/3d7cb6b0/tools/lib/symbol/kallsyms.c#LL17C1-L18C1
if symbol_type == "T" || symbol_type == "W" {
// See `man nm` for the meaning of the symbol types.
if symbol_type == "T"
|| symbol_type == "t"
|| symbol_type == "W"
|| symbol_type == "D"
{
if let Ok(start_addr) = u64::from_str_radix(addr_str, 16) {
return Some(Ksym {
start_addr,
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pub mod bpf;
pub mod collector;
pub mod debug_info;
pub mod kernel;
pub mod ksym;
pub mod perf_events;
pub mod process;
Expand Down
Loading