hermit/arch/x86_64/kernel/
apic.rs

1use alloc::alloc::alloc;
2use alloc::vec::Vec;
3use core::alloc::Layout;
4#[cfg(feature = "smp")]
5use core::arch::x86_64::_mm_mfence;
6#[cfg(feature = "acpi")]
7use core::fmt;
8use core::hint::spin_loop;
9use core::sync::atomic::Ordering;
10use core::{cmp, mem, ptr};
11
12use align_address::Align;
13#[cfg(feature = "smp")]
14use arch::x86_64::kernel::core_local::*;
15use arch::x86_64::kernel::{interrupts, processor};
16use hermit_sync::{OnceCell, SpinMutex, without_interrupts};
17use memory_addresses::{AddrRange, PhysAddr, VirtAddr};
18#[cfg(feature = "smp")]
19use x86_64::registers::control::Cr3;
20use x86_64::registers::model_specific::Msr;
21
22use super::interrupts::IDT;
23use crate::arch::x86_64::kernel::CURRENT_STACK_ADDRESS;
24#[cfg(feature = "acpi")]
25use crate::arch::x86_64::kernel::acpi;
26use crate::arch::x86_64::mm::paging::{
27	BasePageSize, PageSize, PageTableEntryFlags, PageTableEntryFlagsExt,
28};
29use crate::arch::x86_64::mm::{paging, virtualmem};
30use crate::arch::x86_64::swapgs;
31use crate::config::*;
32use crate::scheduler::CoreId;
33use crate::{arch, env, scheduler};
34
35/// APIC Location and Status (R/W) See Table 35-2. See Section 10.4.4, Local APIC  Status and Location.
36const IA32_APIC_BASE: Msr = Msr::new(0x1b);
37
38/// TSC Target of Local APIC s TSC Deadline Mode (R/W)  See Table 35-2
39const IA32_TSC_DEADLINE: Msr = Msr::new(0x6e0);
40
41/// x2APIC Task Priority register (R/W)
42const IA32_X2APIC_TPR: u32 = 0x808;
43
44/// x2APIC End of Interrupt. If ( CPUID.01H:ECX.\[bit 21\]  = 1 )
45const IA32_X2APIC_EOI: u32 = 0x80b;
46
47/// x2APIC Spurious Interrupt Vector register (R/W)
48const IA32_X2APIC_SIVR: u32 = 0x80f;
49
50/// Error Status Register. If ( CPUID.01H:ECX.\[bit 21\]  = 1 )
51const IA32_X2APIC_ESR: u32 = 0x828;
52
53/// x2APIC Interrupt Command register (R/W)
54const IA32_X2APIC_ICR: u32 = 0x830;
55
56/// x2APIC LVT Timer Interrupt register (R/W)
57const IA32_X2APIC_LVT_TIMER: u32 = 0x832;
58
59/// x2APIC LVT Thermal Sensor Interrupt register (R/W)
60const IA32_X2APIC_LVT_THERMAL: u32 = 0x833;
61
62/// x2APIC LVT Performance Monitor register (R/W)
63const IA32_X2APIC_LVT_PMI: u32 = 0x834;
64
65/// If ( CPUID.01H:ECX.\[bit 21\]  = 1 )
66const IA32_X2APIC_LVT_LINT0: u32 = 0x835;
67
68/// If ( CPUID.01H:ECX.\[bit 21\]  = 1 )
69const IA32_X2APIC_LVT_LINT1: u32 = 0x836;
70
71/// If ( CPUID.01H:ECX.\[bit 21\]  = 1 )
72const IA32_X2APIC_LVT_ERROR: u32 = 0x837;
73
74/// x2APIC Initial Count register (R/W)
75const IA32_X2APIC_INIT_COUNT: u32 = 0x838;
76
77/// x2APIC Current Count register (R/O)
78const IA32_X2APIC_CUR_COUNT: u32 = 0x839;
79
80/// x2APIC Divide Configuration register (R/W)
81const IA32_X2APIC_DIV_CONF: u32 = 0x83e;
82
83const MP_FLT_SIGNATURE: u32 = 0x5f50_4d5f;
84const MP_CONFIG_SIGNATURE: u32 = 0x504d_4350;
85
86const APIC_ICR2: usize = 0x0310;
87
88const APIC_DIV_CONF_DIVIDE_BY_8: u64 = 0b0010;
89const APIC_EOI_ACK: u64 = 0;
90#[cfg(feature = "smp")]
91const APIC_ICR_DELIVERY_MODE_FIXED: u64 = 0x000;
92#[cfg(feature = "smp")]
93const APIC_ICR_DELIVERY_MODE_INIT: u64 = 0x500;
94#[cfg(feature = "smp")]
95const APIC_ICR_DELIVERY_MODE_STARTUP: u64 = 0x600;
96const APIC_ICR_DELIVERY_STATUS_PENDING: u32 = 1 << 12;
97#[cfg(feature = "smp")]
98const APIC_ICR_LEVEL_TRIGGERED: u64 = 1 << 15;
99#[cfg(feature = "smp")]
100const APIC_ICR_LEVEL_ASSERT: u64 = 1 << 14;
101const APIC_LVT_MASK: u64 = 1 << 16;
102const APIC_LVT_TIMER_TSC_DEADLINE: u64 = 1 << 18;
103const APIC_SIVR_ENABLED: u64 = 1 << 8;
104
105/// Register index: ID
106#[allow(dead_code)]
107const IOAPIC_REG_ID: u32 = 0x0000;
108/// Register index: version
109const IOAPIC_REG_VER: u32 = 0x0001;
110/// Redirection table base
111const IOAPIC_REG_TABLE: u32 = 0x0010;
112
113#[cfg(feature = "smp")]
114const TLB_FLUSH_INTERRUPT_NUMBER: u8 = 112;
115#[cfg(feature = "smp")]
116const WAKEUP_INTERRUPT_NUMBER: u8 = 121;
117pub const TIMER_INTERRUPT_NUMBER: u8 = 123;
118const ERROR_INTERRUPT_NUMBER: u8 = 126;
119const SPURIOUS_INTERRUPT_NUMBER: u8 = 127;
120
121/// Physical and virtual memory address for our SMP boot code.
122///
123/// While our boot processor is already in x86-64 mode, application processors boot up in 16-bit real mode
124/// and need an address in the CS:IP addressing scheme to jump to.
125/// The CS:IP addressing scheme is limited to 2^20 bytes (= 1 MiB).
126#[cfg(feature = "smp")]
127const SMP_BOOT_CODE_ADDRESS: VirtAddr = VirtAddr::new(0x8000);
128
129#[cfg(feature = "smp")]
130const SMP_BOOT_CODE_OFFSET_ENTRY: u64 = 0x08;
131#[cfg(feature = "smp")]
132const SMP_BOOT_CODE_OFFSET_CPU_ID: u64 = SMP_BOOT_CODE_OFFSET_ENTRY + 0x08;
133#[cfg(feature = "smp")]
134const SMP_BOOT_CODE_OFFSET_PML4: u64 = SMP_BOOT_CODE_OFFSET_CPU_ID + 0x04;
135
136const X2APIC_ENABLE: u64 = 1 << 10;
137
138static LOCAL_APIC_ADDRESS: OnceCell<VirtAddr> = OnceCell::new();
139static IOAPIC_ADDRESS: OnceCell<VirtAddr> = OnceCell::new();
140
141/// Stores the Local APIC IDs of all CPUs. The index equals the Core ID.
142/// Both numbers often match, but don't need to (e.g. when a core has been disabled).
143static CPU_LOCAL_APIC_IDS: SpinMutex<Vec<u8>> = SpinMutex::new(Vec::new());
144
145/// After calibration, initialize the APIC Timer with this counter value to let it fire an interrupt
146/// after 1 microsecond.
147static CALIBRATED_COUNTER_VALUE: OnceCell<u64> = OnceCell::new();
148
149/// MP Floating Pointer Structure
150#[repr(C, packed)]
151struct ApicMP {
152	signature: u32,
153	mp_config: u32,
154	length: u8,
155	version: u8,
156	checksum: u8,
157	features: [u8; 5],
158}
159
160/// MP Configuration Table
161#[repr(C, packed)]
162struct ApicConfigTable {
163	signature: u32,
164	length: u16,
165	revision: u8,
166	checksum: u8,
167	oem_id: [u8; 8],
168	product_id: [u8; 12],
169	oem_table: u32,
170	oem_table_size: u16,
171	entry_count: u16,
172	lapic: u32,
173	extended_table_length: u16,
174	extended_table_checksum: u8,
175	reserved: u8,
176}
177
178/// APIC Processor Entry
179#[repr(C, packed)]
180struct ApicProcessorEntry {
181	ty: u8,
182	id: u8,
183	version: u8,
184	cpu_flags: u8,
185	cpu_signature: u32,
186	cpu_feature: u32,
187	reserved: [u32; 2],
188}
189
190/// IO APIC Entry
191#[repr(C, packed)]
192struct ApicIoEntry {
193	ty: u8,
194	id: u8,
195	version: u8,
196	enabled: u8,
197	addr: u32,
198}
199
200#[cfg(feature = "acpi")]
201#[repr(C, packed)]
202struct AcpiMadtHeader {
203	local_apic_address: u32,
204	flags: u32,
205}
206
207#[cfg(feature = "acpi")]
208#[repr(C, packed)]
209struct AcpiMadtRecordHeader {
210	entry_type: u8,
211	length: u8,
212}
213
214#[cfg(feature = "acpi")]
215#[repr(C, packed)]
216struct ProcessorLocalApicRecord {
217	acpi_processor_id: u8,
218	apic_id: u8,
219	flags: u32,
220}
221
222#[cfg(feature = "acpi")]
223impl fmt::Display for ProcessorLocalApicRecord {
224	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
225		write!(f, "{{ acpi_processor_id: {}, ", { self.acpi_processor_id })?;
226		write!(f, "apic_id: {}, ", { self.apic_id })?;
227		write!(f, "flags: {} }}", { self.flags })?;
228		Ok(())
229	}
230}
231
232#[cfg(feature = "acpi")]
233const CPU_FLAG_ENABLED: u32 = 1 << 0;
234
235#[cfg(feature = "acpi")]
236#[repr(C, packed)]
237struct IoApicRecord {
238	id: u8,
239	reserved: u8,
240	address: u32,
241	global_system_interrupt_base: u32,
242}
243
244#[cfg(feature = "acpi")]
245impl fmt::Display for IoApicRecord {
246	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
247		write!(f, "{{ id: {}, ", { self.id })?;
248		write!(f, "reserved: {}, ", { self.reserved })?;
249		write!(f, "address: {:#X}, ", { self.address })?;
250		write!(f, "global_system_interrupt_base: {} }}", {
251			self.global_system_interrupt_base
252		})?;
253		Ok(())
254	}
255}
256
257#[cfg(feature = "smp")]
258extern "x86-interrupt" fn tlb_flush_handler(stack_frame: interrupts::ExceptionStackFrame) {
259	swapgs(&stack_frame);
260	debug!("Received TLB Flush Interrupt");
261	increment_irq_counter(TLB_FLUSH_INTERRUPT_NUMBER);
262	let (frame, val) = Cr3::read_raw();
263	unsafe {
264		Cr3::write_raw(frame, val);
265	}
266	eoi();
267	swapgs(&stack_frame);
268}
269
270extern "x86-interrupt" fn error_interrupt_handler(stack_frame: interrupts::ExceptionStackFrame) {
271	swapgs(&stack_frame);
272	error!("APIC LVT Error Interrupt");
273	error!("ESR: {:#X}", local_apic_read(IA32_X2APIC_ESR));
274	error!("{stack_frame:#?}");
275	eoi();
276	scheduler::abort();
277}
278
279extern "x86-interrupt" fn spurious_interrupt_handler(stack_frame: interrupts::ExceptionStackFrame) {
280	swapgs(&stack_frame);
281	error!("Spurious Interrupt: {stack_frame:#?}");
282	scheduler::abort();
283}
284
285#[cfg(feature = "smp")]
286extern "x86-interrupt" fn wakeup_handler(stack_frame: interrupts::ExceptionStackFrame) {
287	swapgs(&stack_frame);
288	use crate::scheduler::PerCoreSchedulerExt;
289
290	debug!("Received Wakeup Interrupt");
291	increment_irq_counter(WAKEUP_INTERRUPT_NUMBER);
292	let core_scheduler = core_scheduler();
293	core_scheduler.check_input();
294	eoi();
295	if core_scheduler.is_scheduling() {
296		core_scheduler.reschedule();
297	}
298	swapgs(&stack_frame);
299}
300
301#[inline]
302pub fn add_local_apic_id(id: u8) {
303	CPU_LOCAL_APIC_IDS.lock().push(id);
304}
305
306#[cfg(feature = "smp")]
307pub fn local_apic_id_count() -> u32 {
308	CPU_LOCAL_APIC_IDS.lock().len() as u32
309}
310
311fn init_ioapic_address(phys_addr: PhysAddr) {
312	if env::is_uefi() {
313		// UEFI systems have already id mapped everything, so we can just set the physical address as the virtual one
314		IOAPIC_ADDRESS
315			.set(VirtAddr::new(phys_addr.as_u64()))
316			.unwrap();
317	} else {
318		let ioapic_address = virtualmem::allocate(BasePageSize::SIZE as usize).unwrap();
319		IOAPIC_ADDRESS.set(ioapic_address).unwrap();
320		debug!("Mapping IOAPIC at {phys_addr:p} to virtual address {ioapic_address:p}",);
321
322		let mut flags = PageTableEntryFlags::empty();
323		flags.device().writable().execute_disable();
324		paging::map::<BasePageSize>(ioapic_address, phys_addr, 1, flags);
325	}
326}
327
328#[cfg(not(feature = "acpi"))]
329fn detect_from_acpi() -> Result<PhysAddr, ()> {
330	// dummy implementation if acpi support is disabled
331	Err(())
332}
333
334#[cfg(feature = "acpi")]
335fn detect_from_acpi() -> Result<PhysAddr, ()> {
336	// Get the Multiple APIC Description Table (MADT) from the ACPI information and its specific table header.
337	let madt = acpi::get_madt().ok_or(())?;
338	let madt_header =
339		unsafe { &*(ptr::with_exposed_provenance::<AcpiMadtHeader>(madt.table_start_address())) };
340
341	// Jump to the actual table entries (after the table header).
342	let mut current_address = madt.table_start_address() + mem::size_of::<AcpiMadtHeader>();
343
344	// Loop through all table entries.
345	while current_address < madt.table_end_address() {
346		let record =
347			unsafe { &*(ptr::with_exposed_provenance::<AcpiMadtRecordHeader>(current_address)) };
348		current_address += mem::size_of::<AcpiMadtRecordHeader>();
349
350		match record.entry_type {
351			0 => {
352				// Processor Local APIC
353				let processor_local_apic_record = unsafe {
354					&*(ptr::with_exposed_provenance::<ProcessorLocalApicRecord>(current_address))
355				};
356				debug!("Found Processor Local APIC record: {processor_local_apic_record}");
357
358				if processor_local_apic_record.flags & CPU_FLAG_ENABLED > 0 {
359					add_local_apic_id(processor_local_apic_record.apic_id);
360				}
361			}
362			1 => {
363				// I/O APIC
364				let ioapic_record =
365					unsafe { &*(ptr::with_exposed_provenance::<IoApicRecord>(current_address)) };
366				debug!("Found I/O APIC record: {ioapic_record}");
367
368				init_ioapic_address(PhysAddr::new(ioapic_record.address.into()));
369			}
370			_ => {
371				// Just ignore other entries for now.
372			}
373		}
374
375		current_address += record.length as usize - mem::size_of::<AcpiMadtRecordHeader>();
376	}
377
378	// Successfully derived all information from the MADT.
379	// Return the physical address of the Local APIC.
380	Ok(PhysAddr::new(madt_header.local_apic_address.into()))
381}
382
383/// Helper function to search Floating Pointer Structure of the Multiprocessing Specification
384fn search_mp_floating(memory_range: AddrRange<PhysAddr>) -> Result<&'static ApicMP, ()> {
385	let virtual_address = virtualmem::allocate(BasePageSize::SIZE as usize).map_err(|_| ())?;
386
387	for current_address in memory_range.iter().step_by(BasePageSize::SIZE as usize) {
388		let mut flags = PageTableEntryFlags::empty();
389		flags.normal().writable();
390		paging::map::<BasePageSize>(
391			virtual_address,
392			current_address.align_down(BasePageSize::SIZE),
393			1,
394			flags,
395		);
396
397		for i in 0..BasePageSize::SIZE / 4 {
398			let mut tmp: *const u32 = virtual_address.as_ptr();
399			tmp = unsafe { tmp.offset(i.try_into().unwrap()) };
400			let apic_mp = unsafe { &*tmp.cast::<ApicMP>() };
401			if apic_mp.signature == MP_FLT_SIGNATURE
402				&& !(apic_mp.version > 4 || apic_mp.features[0] != 0)
403			{
404				return Ok(apic_mp);
405			}
406		}
407	}
408
409	// frees obsolete virtual memory region for MMIO devices
410	virtualmem::deallocate(virtual_address, BasePageSize::SIZE as usize);
411
412	Err(())
413}
414
415/// Helper function to detect APIC by the Multiprocessor Specification
416fn detect_from_mp() -> Result<PhysAddr, ()> {
417	let mp_float = if let Ok(mpf) = search_mp_floating(
418		AddrRange::new(PhysAddr::new(0x9f000u64), PhysAddr::new(0xa0000u64)).unwrap(),
419	) {
420		Ok(mpf)
421	} else if let Ok(mpf) = search_mp_floating(
422		AddrRange::new(PhysAddr::new(0xf0000u64), PhysAddr::new(0x10_0000u64)).unwrap(),
423	) {
424		Ok(mpf)
425	} else {
426		Err(())
427	}?;
428
429	info!("Found MP config at {:#x}", { mp_float.mp_config });
430	info!(
431		"System uses Multiprocessing Specification 1.{}",
432		mp_float.version
433	);
434	info!("MP features 1: {}", mp_float.features[0]);
435
436	if mp_float.features[1] & 0x80 > 0 {
437		info!("PIC mode implemented");
438	} else {
439		info!("Virtual-Wire mode implemented");
440	}
441
442	let virtual_address = virtualmem::allocate(BasePageSize::SIZE as usize).map_err(|_| ())?;
443
444	let mut flags = PageTableEntryFlags::empty();
445	flags.normal().writable();
446	paging::map::<BasePageSize>(
447		virtual_address,
448		PhysAddr::from((mp_float.mp_config as usize).align_down(BasePageSize::SIZE as usize)),
449		1,
450		flags,
451	);
452
453	let mut addr: usize =
454		(virtual_address | (u64::from(mp_float.mp_config) & (BasePageSize::SIZE - 1))) as usize;
455	let mp_config: &ApicConfigTable = unsafe { &*(ptr::with_exposed_provenance(addr)) };
456	if mp_config.signature != MP_CONFIG_SIGNATURE {
457		warn!("Invalid MP config table");
458		virtualmem::deallocate(virtual_address, BasePageSize::SIZE as usize);
459		return Err(());
460	}
461
462	if mp_config.entry_count == 0 {
463		warn!("No MP table entries! Guess IO-APIC!");
464		let default_address = PhysAddr::new(0xfec0_0000);
465
466		init_ioapic_address(default_address);
467	} else {
468		// entries starts directly after the config table
469		addr += mem::size_of::<ApicConfigTable>();
470		for _i in 0..mp_config.entry_count {
471			match unsafe { *(ptr::with_exposed_provenance::<u8>(addr)) } {
472				// CPU entry
473				0 => {
474					let cpu_entry: &ApicProcessorEntry =
475						unsafe { &*(ptr::with_exposed_provenance(addr)) };
476					if cpu_entry.cpu_flags & 0x01 == 0x01 {
477						add_local_apic_id(cpu_entry.id);
478					}
479					addr += mem::size_of::<ApicProcessorEntry>();
480				}
481				// IO-APIC entry
482				2 => {
483					let io_entry: &ApicIoEntry = unsafe { &*(ptr::with_exposed_provenance(addr)) };
484					let ioapic = PhysAddr::new(io_entry.addr.into());
485					info!("Found IOAPIC at 0x{ioapic:p}");
486
487					init_ioapic_address(ioapic);
488
489					addr += mem::size_of::<ApicIoEntry>();
490				}
491				_ => {
492					addr += 8;
493				}
494			}
495		}
496	}
497
498	Ok(PhysAddr::new(mp_config.lapic.into()))
499}
500
501fn default_apic() -> PhysAddr {
502	warn!("Try to use default APIC address");
503
504	let default_address = PhysAddr::new(0xfee0_0000);
505
506	// currently, uhyve doesn't support an IO-APIC
507	if !env::is_uhyve() {
508		init_ioapic_address(default_address);
509	}
510
511	default_address
512}
513
514pub fn eoi() {
515	local_apic_write(IA32_X2APIC_EOI, APIC_EOI_ACK);
516}
517
518pub fn init() {
519	// Detect CPUs and APICs.
520	let local_apic_physical_address = detect_from_acpi()
521		.or_else(|()| detect_from_mp())
522		.unwrap_or_else(|()| default_apic());
523
524	// Initialize x2APIC or xAPIC, depending on what's available.
525	init_x2apic();
526	if !processor::supports_x2apic() {
527		// We use the traditional xAPIC mode available on all x86-64 CPUs.
528		// It uses a mapped page for communication.
529		if env::is_uefi() {
530			//already id mapped in UEFI systems, just use the physical address as virtual one
531			LOCAL_APIC_ADDRESS
532				.set(VirtAddr::new(local_apic_physical_address.as_u64()))
533				.unwrap();
534		} else {
535			let local_apic_address = virtualmem::allocate(BasePageSize::SIZE as usize).unwrap();
536			LOCAL_APIC_ADDRESS.set(local_apic_address).unwrap();
537			debug!(
538				"Mapping Local APIC at {local_apic_physical_address:p} to virtual address {local_apic_address:p}"
539			);
540
541			let mut flags = PageTableEntryFlags::empty();
542			flags.device().writable().execute_disable();
543			paging::map::<BasePageSize>(local_apic_address, local_apic_physical_address, 1, flags);
544		}
545	}
546
547	// Set gates to ISRs for the APIC interrupts we are going to enable.
548	unsafe {
549		let mut idt = IDT.lock();
550		idt[ERROR_INTERRUPT_NUMBER]
551			.set_handler_fn(error_interrupt_handler)
552			.set_stack_index(0);
553		idt[SPURIOUS_INTERRUPT_NUMBER]
554			.set_handler_fn(spurious_interrupt_handler)
555			.set_stack_index(0);
556		#[cfg(feature = "smp")]
557		{
558			idt[TLB_FLUSH_INTERRUPT_NUMBER]
559				.set_handler_fn(tlb_flush_handler)
560				.set_stack_index(0);
561			interrupts::add_irq_name(TLB_FLUSH_INTERRUPT_NUMBER - 32, "TLB flush");
562			idt[WAKEUP_INTERRUPT_NUMBER]
563				.set_handler_fn(wakeup_handler)
564				.set_stack_index(0);
565			interrupts::add_irq_name(WAKEUP_INTERRUPT_NUMBER - 32, "Wakeup");
566		}
567	}
568
569	// Initialize interrupt handling over APIC.
570	// All interrupts of the PIC have already been masked, so it doesn't need to be disabled again.
571	init_local_apic();
572
573	if !processor::supports_tsc_deadline() {
574		// We have an older APIC Timer without TSC Deadline support, which has a maximum timeout
575		// and needs to be calibrated.
576		calibrate_timer();
577	}
578
579	// currently, IO-APIC isn't supported by uhyve
580	if !env::is_uhyve() {
581		// initialize IO-APIC
582		init_ioapic();
583	}
584}
585
586fn init_ioapic() {
587	let max_entry = ioapic_max_redirection_entry() + 1;
588	info!("IOAPIC v{} has {} entries", ioapic_version(), max_entry);
589
590	// now lets turn everything else on
591	for i in 0..max_entry {
592		// Turn off the Programmable Interrupt Timer Interrupt (IRQ 0) and
593		// the Real Time Clock (IRQ 2).
594		let enabled = !matches!(i, 0 | 2);
595		ioapic_set_interrupt(i, 0, enabled);
596	}
597}
598
599fn ioapic_set_interrupt(irq: u8, apicid: u8, enabled: bool) {
600	assert!(irq <= 24);
601
602	let off = u32::from(irq * 2);
603	let ioredirect_upper = u32::from(apicid) << 24;
604	let mut ioredirect_lower = u32::from(0x20 + irq);
605	if !enabled {
606		debug!("Disabling irq {irq}");
607		ioredirect_lower |= 1 << 16;
608	}
609
610	ioapic_write(IOAPIC_REG_TABLE + off, ioredirect_lower);
611	ioapic_write(IOAPIC_REG_TABLE + off + 1, ioredirect_upper);
612}
613
614pub fn init_local_apic() {
615	// Mask out all interrupts we don't need right now.
616	local_apic_write(IA32_X2APIC_LVT_TIMER, APIC_LVT_MASK);
617	local_apic_write(IA32_X2APIC_LVT_THERMAL, APIC_LVT_MASK);
618	local_apic_write(IA32_X2APIC_LVT_PMI, APIC_LVT_MASK);
619	local_apic_write(IA32_X2APIC_LVT_LINT0, APIC_LVT_MASK);
620	local_apic_write(IA32_X2APIC_LVT_LINT1, APIC_LVT_MASK);
621
622	// Set the interrupt number of the Error interrupt.
623	local_apic_write(IA32_X2APIC_LVT_ERROR, u64::from(ERROR_INTERRUPT_NUMBER));
624
625	// allow all interrupts
626	local_apic_write(IA32_X2APIC_TPR, 0x00);
627
628	// Finally, enable the Local APIC by setting the interrupt number for spurious interrupts
629	// and providing the enable bit.
630	local_apic_write(
631		IA32_X2APIC_SIVR,
632		APIC_SIVR_ENABLED | (u64::from(SPURIOUS_INTERRUPT_NUMBER)),
633	);
634}
635
636fn calibrate_timer() {
637	// The APIC Timer is used to provide a one-shot interrupt for the tickless timer
638	// implemented through processor::get_timer_ticks.
639	// Therefore determine a counter value for 1 microsecond, which is the resolution
640	// used throughout all of Hermit. Wait 30ms for accuracy.
641	let microseconds = 30_000;
642
643	// Be sure that all interrupts for calibration accuracy and initialize the counter are disabled.
644	// Dividing the counter value by 8 still provides enough accuracy for 1 microsecond resolution,
645	// but allows for longer timeouts than a smaller divisor.
646	// For example, on an Intel Xeon E5-2650 v3 @ 2.30GHz, the counter is usually calibrated to
647	// 125, which allows for timeouts of approximately 34 seconds (u32::MAX / 125).
648
649	local_apic_write(IA32_X2APIC_DIV_CONF, APIC_DIV_CONF_DIVIDE_BY_8);
650	local_apic_write(IA32_X2APIC_INIT_COUNT, u64::from(u32::MAX));
651
652	// Wait until the calibration time has elapsed.
653	processor::udelay(microseconds);
654
655	// Save the difference of the initial value and current value as the result of the calibration
656	// and re-enable interrupts.
657	let calibrated_counter_value =
658		(u64::from(u32::MAX - local_apic_read(IA32_X2APIC_CUR_COUNT))) / microseconds;
659	CALIBRATED_COUNTER_VALUE
660		.set(calibrated_counter_value)
661		.unwrap();
662	debug!(
663		"Calibrated APIC Timer with a counter value of {calibrated_counter_value} for 1 microsecond",
664	);
665}
666
667fn __set_oneshot_timer(wakeup_time: Option<u64>) {
668	if let Some(wt) = wakeup_time {
669		if processor::supports_tsc_deadline() {
670			// wt is the absolute wakeup time in microseconds based on processor::get_timer_ticks.
671			// We can simply multiply it by the processor frequency to get the absolute Time-Stamp Counter deadline
672			// (see processor::get_timer_ticks).
673			let tsc_deadline = wt * (u64::from(processor::get_frequency()));
674
675			// Enable the APIC Timer in TSC-Deadline Mode and let it start by writing to the respective MSR.
676			local_apic_write(
677				IA32_X2APIC_LVT_TIMER,
678				APIC_LVT_TIMER_TSC_DEADLINE | u64::from(TIMER_INTERRUPT_NUMBER),
679			);
680			let mut ia32_tsc_deadline = IA32_TSC_DEADLINE;
681			unsafe {
682				ia32_tsc_deadline.write(tsc_deadline);
683			}
684		} else {
685			// Calculate the relative timeout from the absolute wakeup time.
686			// Maintain a minimum value of one tick, otherwise the timer interrupt does not fire at all.
687			// The Timer Counter Register is also a 32-bit register, which we must not overflow for longer timeouts.
688			let current_time = processor::get_timer_ticks();
689			let ticks = if wt > current_time {
690				wt - current_time
691			} else {
692				1
693			};
694			let init_count = cmp::min(
695				CALIBRATED_COUNTER_VALUE.get().unwrap() * ticks,
696				u64::from(u32::MAX),
697			);
698
699			// Enable the APIC Timer in One-Shot Mode and let it start by setting the initial counter value.
700			local_apic_write(IA32_X2APIC_LVT_TIMER, u64::from(TIMER_INTERRUPT_NUMBER));
701			local_apic_write(IA32_X2APIC_INIT_COUNT, init_count);
702		}
703	} else {
704		// Disable the APIC Timer.
705		local_apic_write(IA32_X2APIC_LVT_TIMER, APIC_LVT_MASK);
706	}
707}
708
709pub fn set_oneshot_timer(wakeup_time: Option<u64>) {
710	without_interrupts(|| {
711		__set_oneshot_timer(wakeup_time);
712	});
713}
714
715pub fn init_x2apic() {
716	if processor::supports_x2apic() {
717		debug!("Enable x2APIC support");
718		// The CPU supports the modern x2APIC mode, which uses MSRs for communication.
719		// Enable it.
720		let mut msr = IA32_APIC_BASE;
721		let mut apic_base = unsafe { msr.read() };
722		apic_base |= X2APIC_ENABLE;
723		unsafe {
724			msr.write(apic_base);
725		}
726	}
727}
728
729/// Initialize the required _start variables for the next CPU to be booted.
730pub fn init_next_processor_variables() {
731	// Allocate stack for the CPU and pass the addresses.
732	let layout = Layout::from_size_align(KERNEL_STACK_SIZE, BasePageSize::SIZE as usize).unwrap();
733	let stack = unsafe { alloc(layout) };
734	assert!(!stack.is_null());
735	CURRENT_STACK_ADDRESS.store(stack, Ordering::Relaxed);
736}
737
738/// Boot all Application Processors
739/// This algorithm is derived from Intel MultiProcessor Specification 1.4, B.4, but testing has shown
740/// that a second STARTUP IPI and setting the BIOS Reset Vector are no longer necessary.
741/// This is partly confirmed by <https://wiki.osdev.org/Symmetric_Multiprocessing>
742#[cfg(all(target_os = "none", feature = "smp"))]
743pub fn boot_application_processors() {
744	use core::hint;
745
746	use x86_64::structures::paging::Translate;
747
748	use super::start;
749
750	let smp_boot_code = include_bytes!(concat!(core::env!("OUT_DIR"), "/boot.bin"));
751
752	// We shouldn't have any problems fitting the boot code into a single page, but let's better be sure.
753	assert!(
754		smp_boot_code.len() < BasePageSize::SIZE as usize,
755		"SMP Boot Code is larger than a page"
756	);
757	debug!("SMP boot code is {} bytes long", smp_boot_code.len());
758
759	if env::is_uefi() {
760		// Since UEFI already provides identity-mapped pagetables, we only have to sanity-check the identity mapping
761		let pt = unsafe { crate::arch::mm::paging::identity_mapped_page_table() };
762		let virt_addr = SMP_BOOT_CODE_ADDRESS;
763		let phys_addr = pt.translate_addr(virt_addr.into()).unwrap();
764		assert_eq!(phys_addr.as_u64(), virt_addr.as_u64());
765	} else {
766		// Identity-map the boot code page and copy over the code.
767		debug!("Mapping SMP boot code to physical and virtual address {SMP_BOOT_CODE_ADDRESS:p}");
768		let mut flags = PageTableEntryFlags::empty();
769		flags.normal().writable();
770		paging::map::<BasePageSize>(
771			SMP_BOOT_CODE_ADDRESS,
772			PhysAddr::new(SMP_BOOT_CODE_ADDRESS.as_u64()),
773			1,
774			flags,
775		);
776	}
777	unsafe {
778		ptr::copy_nonoverlapping(
779			smp_boot_code.as_ptr(),
780			SMP_BOOT_CODE_ADDRESS.as_mut_ptr(),
781			smp_boot_code.len(),
782		);
783	}
784
785	unsafe {
786		let (frame, val) = Cr3::read_raw();
787		let value = frame.start_address().as_u64() | u64::from(val);
788		// Pass the PML4 page table address to the boot code.
789		*((SMP_BOOT_CODE_ADDRESS + SMP_BOOT_CODE_OFFSET_PML4).as_mut_ptr::<u32>()) =
790			value.try_into().unwrap();
791		// Set entry point
792		debug!(
793			"Set entry point for application processor to {:p}",
794			start::_start as *const ()
795		);
796		ptr::write_unaligned(
797			(SMP_BOOT_CODE_ADDRESS + SMP_BOOT_CODE_OFFSET_ENTRY).as_mut_ptr(),
798			start::_start as usize,
799		);
800	}
801
802	// Now wake up each application processor.
803	let apic_ids = CPU_LOCAL_APIC_IDS.lock();
804	let core_id = core_id();
805
806	for (core_id_to_boot, &apic_id) in apic_ids.iter().enumerate() {
807		let core_id_to_boot = core_id_to_boot as u32;
808		if core_id_to_boot != core_id {
809			unsafe {
810				*((SMP_BOOT_CODE_ADDRESS + SMP_BOOT_CODE_OFFSET_CPU_ID).as_mut_ptr()) =
811					core_id_to_boot;
812			}
813			let destination = u64::from(apic_id) << 32;
814
815			debug!("Waking up CPU {core_id_to_boot} with Local APIC ID {apic_id}");
816			init_next_processor_variables();
817
818			// Save the current number of initialized CPUs.
819			let current_processor_count = arch::get_processor_count();
820
821			// Send an INIT IPI.
822			local_apic_write(
823				IA32_X2APIC_ICR,
824				destination
825					| APIC_ICR_LEVEL_TRIGGERED
826					| APIC_ICR_LEVEL_ASSERT
827					| APIC_ICR_DELIVERY_MODE_INIT,
828			);
829			processor::udelay(200);
830
831			local_apic_write(
832				IA32_X2APIC_ICR,
833				destination | APIC_ICR_LEVEL_TRIGGERED | APIC_ICR_DELIVERY_MODE_INIT,
834			);
835			processor::udelay(10000);
836
837			// Send a STARTUP IPI.
838			local_apic_write(
839				IA32_X2APIC_ICR,
840				destination
841					| APIC_ICR_DELIVERY_MODE_STARTUP
842					| ((SMP_BOOT_CODE_ADDRESS.as_u64()) >> 12),
843			);
844			debug!("Waiting for it to respond");
845
846			// Wait until the application processor has finished initializing.
847			// It will indicate this by counting up cpu_online.
848			while current_processor_count == arch::get_processor_count() {
849				hint::spin_loop();
850			}
851		}
852	}
853
854	print_information();
855}
856
857#[cfg(feature = "smp")]
858pub fn ipi_tlb_flush() {
859	if arch::get_processor_count() > 1 {
860		let apic_ids = CPU_LOCAL_APIC_IDS.lock();
861		let core_id = core_id();
862
863		// Ensure that all memory operations have completed before issuing a TLB flush.
864		unsafe {
865			_mm_mfence();
866		}
867
868		// Send an IPI with our TLB Flush interrupt number to all other CPUs.
869		without_interrupts(|| {
870			for (core_id_to_interrupt, &apic_id) in apic_ids.iter().enumerate() {
871				if core_id_to_interrupt != usize::try_from(core_id).unwrap() {
872					let destination = u64::from(apic_id) << 32;
873					local_apic_write(
874						IA32_X2APIC_ICR,
875						destination
876							| APIC_ICR_LEVEL_ASSERT
877							| APIC_ICR_DELIVERY_MODE_FIXED
878							| u64::from(TLB_FLUSH_INTERRUPT_NUMBER),
879					);
880				}
881			}
882		});
883	}
884}
885
886/// Send an inter-processor interrupt to wake up a CPU Core that is in a HALT state.
887#[allow(unused_variables)]
888pub fn wakeup_core(core_id_to_wakeup: CoreId) {
889	#[cfg(all(feature = "smp", not(feature = "idle-poll")))]
890	if core_id_to_wakeup != core_id()
891		&& !crate::processor::supports_mwait()
892		&& crate::scheduler::take_core_hlt_state(core_id_to_wakeup)
893	{
894		without_interrupts(|| {
895			let apic_ids = CPU_LOCAL_APIC_IDS.lock();
896			let local_apic_id = apic_ids[core_id_to_wakeup as usize];
897			let destination = u64::from(local_apic_id) << 32;
898			local_apic_write(
899				IA32_X2APIC_ICR,
900				destination
901					| APIC_ICR_LEVEL_ASSERT
902					| APIC_ICR_DELIVERY_MODE_FIXED
903					| u64::from(WAKEUP_INTERRUPT_NUMBER),
904			);
905		});
906	}
907}
908
909/// Translate the x2APIC MSR into an xAPIC memory address.
910#[inline]
911fn translate_x2apic_msr_to_xapic_address(x2apic_msr: u32) -> VirtAddr {
912	*LOCAL_APIC_ADDRESS.get().unwrap() + ((u64::from(x2apic_msr) & 0xff) << 4)
913}
914
915fn local_apic_read(x2apic_msr: u32) -> u32 {
916	if processor::supports_x2apic() {
917		// x2APIC is simple, we can just read from the given MSR.
918		unsafe { Msr::new(x2apic_msr).read() as u32 }
919	} else {
920		unsafe { *(translate_x2apic_msr_to_xapic_address(x2apic_msr).as_ptr::<u32>()) }
921	}
922}
923
924fn ioapic_write(reg: u32, value: u32) {
925	unsafe {
926		core::ptr::write_volatile(IOAPIC_ADDRESS.get().unwrap().as_mut_ptr::<u32>(), reg);
927		core::ptr::write_volatile(
928			(*IOAPIC_ADDRESS.get().unwrap() + 4 * mem::size_of::<u32>()).as_mut_ptr::<u32>(),
929			value,
930		);
931	}
932}
933
934fn ioapic_read(reg: u32) -> u32 {
935	let value;
936
937	unsafe {
938		core::ptr::write_volatile(IOAPIC_ADDRESS.get().unwrap().as_mut_ptr::<u32>(), reg);
939		value = core::ptr::read_volatile(
940			(*IOAPIC_ADDRESS.get().unwrap() + 4 * mem::size_of::<u32>()).as_ptr::<u32>(),
941		);
942	}
943
944	value
945}
946
947fn ioapic_version() -> u32 {
948	ioapic_read(IOAPIC_REG_VER) & 0xff
949}
950
951fn ioapic_max_redirection_entry() -> u8 {
952	((ioapic_read(IOAPIC_REG_VER) >> 16) & 0xff) as u8
953}
954
955fn local_apic_write(x2apic_msr: u32, value: u64) {
956	if processor::supports_x2apic() {
957		// x2APIC is simple, we can just write the given value to the given MSR.
958		unsafe {
959			Msr::new(x2apic_msr).write(value);
960		}
961	} else {
962		// Write the value.
963		let value_ref = unsafe {
964			&mut *(translate_x2apic_msr_to_xapic_address(x2apic_msr).as_mut_ptr::<u32>())
965		};
966
967		if x2apic_msr == IA32_X2APIC_ICR {
968			// The ICR1 register in xAPIC mode also has a Delivery Status bit.
969			// Wait until previous interrupt was delivered.
970			// This bit does not exist in x2APIC mode (cf. Intel Vol. 3A, 10.12.9).
971			while (unsafe { core::ptr::read_volatile(value_ref) }
972				& APIC_ICR_DELIVERY_STATUS_PENDING)
973				> 0
974			{
975				spin_loop();
976			}
977
978			// Instead of a single 64-bit ICR register, xAPIC has two 32-bit registers (ICR1 and ICR2).
979			// There is a gap between them and the destination field in ICR2 is also 8 bits instead of 32 bits.
980			let destination = ((value >> 8) & 0xff00_0000) as u32;
981			let icr2 = unsafe {
982				&mut *((*LOCAL_APIC_ADDRESS.get().unwrap() + APIC_ICR2).as_mut_ptr::<u32>())
983			};
984			*icr2 = destination;
985
986			// The remaining data without the destination will now be written into ICR1.
987		}
988
989		*value_ref = value as u32;
990	}
991}
992
993pub fn print_information() {
994	infoheader!(" MULTIPROCESSOR INFORMATION ");
995	infoentry!(
996		"APIC in use",
997		if processor::supports_x2apic() {
998			"x2APIC"
999		} else {
1000			"xAPIC"
1001		}
1002	);
1003	infoentry!("Initialized CPUs", arch::get_processor_count());
1004	infofooter!();
1005}