hermit/scheduler/
mod.rs

1#![allow(clippy::type_complexity)]
2
3use alloc::boxed::Box;
4use alloc::collections::{BTreeMap, VecDeque};
5use alloc::rc::Rc;
6use alloc::sync::Arc;
7#[cfg(feature = "smp")]
8use alloc::vec::Vec;
9use core::cell::RefCell;
10use core::future::{self, Future};
11use core::ptr;
12#[cfg(all(target_arch = "x86_64", feature = "smp"))]
13use core::sync::atomic::AtomicBool;
14use core::sync::atomic::{AtomicI32, AtomicU32, Ordering};
15use core::task::Poll::Ready;
16use core::task::ready;
17
18use ahash::RandomState;
19use crossbeam_utils::Backoff;
20use hashbrown::HashMap;
21use hermit_sync::*;
22#[cfg(target_arch = "riscv64")]
23use riscv::register::sstatus;
24
25use crate::arch::core_local::*;
26#[cfg(target_arch = "riscv64")]
27use crate::arch::switch::switch_to_task;
28#[cfg(target_arch = "x86_64")]
29use crate::arch::switch::{switch_to_fpu_owner, switch_to_task};
30use crate::arch::{get_processor_count, interrupts};
31use crate::fd::{FileDescriptor, ObjectInterface};
32use crate::kernel::scheduler::TaskStacks;
33use crate::scheduler::task::*;
34use crate::{arch, io};
35
36pub mod task;
37
38static NO_TASKS: AtomicU32 = AtomicU32::new(0);
39/// Map between Core ID and per-core scheduler
40#[cfg(feature = "smp")]
41static SCHEDULER_INPUTS: SpinMutex<Vec<&InterruptTicketMutex<SchedulerInput>>> =
42	SpinMutex::new(Vec::new());
43#[cfg(all(target_arch = "x86_64", feature = "smp"))]
44static CORE_HLT_STATE: SpinMutex<Vec<&AtomicBool>> = SpinMutex::new(Vec::new());
45/// Map between Task ID and Queue of waiting tasks
46static WAITING_TASKS: InterruptTicketMutex<BTreeMap<TaskId, VecDeque<TaskHandle>>> =
47	InterruptTicketMutex::new(BTreeMap::new());
48/// Map between Task ID and TaskHandle
49static TASKS: InterruptTicketMutex<BTreeMap<TaskId, TaskHandle>> =
50	InterruptTicketMutex::new(BTreeMap::new());
51
52/// Unique identifier for a core.
53pub type CoreId = u32;
54
55#[cfg(feature = "smp")]
56pub(crate) struct SchedulerInput {
57	/// Queue of new tasks
58	new_tasks: VecDeque<NewTask>,
59	/// Queue of task, which are wakeup by another core
60	wakeup_tasks: VecDeque<TaskHandle>,
61}
62
63#[cfg(feature = "smp")]
64impl SchedulerInput {
65	pub fn new() -> Self {
66		Self {
67			new_tasks: VecDeque::new(),
68			wakeup_tasks: VecDeque::new(),
69		}
70	}
71}
72
73#[cfg_attr(any(target_arch = "x86_64", target_arch = "aarch64"), repr(align(128)))]
74#[cfg_attr(
75	not(any(target_arch = "x86_64", target_arch = "aarch64")),
76	repr(align(64))
77)]
78pub(crate) struct PerCoreScheduler {
79	/// Core ID of this per-core scheduler
80	#[cfg(feature = "smp")]
81	core_id: CoreId,
82	/// Task which is currently running
83	current_task: Rc<RefCell<Task>>,
84	/// Idle Task
85	idle_task: Rc<RefCell<Task>>,
86	/// Task that currently owns the FPU
87	#[cfg(target_arch = "x86_64")]
88	fpu_owner: Rc<RefCell<Task>>,
89	/// Queue of tasks, which are ready
90	ready_queue: PriorityTaskQueue,
91	/// Queue of tasks, which are finished and can be released
92	finished_tasks: VecDeque<Rc<RefCell<Task>>>,
93	/// Queue of blocked tasks, sorted by wakeup time.
94	blocked_tasks: BlockedTaskQueue,
95}
96
97pub(crate) trait PerCoreSchedulerExt {
98	/// Triggers the scheduler to reschedule the tasks.
99	/// Interrupt flag will be cleared during the reschedule
100	fn reschedule(self);
101
102	#[cfg(any(feature = "tcp", feature = "udp"))]
103	fn add_network_timer(self, wakeup_time: Option<u64>);
104
105	/// Terminate the current task on the current core.
106	fn exit(self, exit_code: i32) -> !;
107}
108
109impl PerCoreSchedulerExt for &mut PerCoreScheduler {
110	#[cfg(target_arch = "x86_64")]
111	fn reschedule(self) {
112		without_interrupts(|| {
113			if let Some(last_stack_pointer) = self.scheduler() {
114				let (new_stack_pointer, is_idle) = {
115					let borrowed = self.current_task.borrow();
116					(
117						borrowed.last_stack_pointer,
118						borrowed.status == TaskStatus::Idle,
119					)
120				};
121
122				if is_idle || Rc::ptr_eq(&self.current_task, &self.fpu_owner) {
123					unsafe {
124						switch_to_fpu_owner(
125							last_stack_pointer,
126							new_stack_pointer.as_u64() as usize,
127						);
128					}
129				} else {
130					unsafe {
131						switch_to_task(last_stack_pointer, new_stack_pointer.as_u64() as usize);
132					}
133				}
134			}
135		});
136	}
137
138	/// Trigger an interrupt to reschedule the system
139	#[cfg(target_arch = "aarch64")]
140	fn reschedule(self) {
141		use core::arch::asm;
142
143		use arm_gic::IntId;
144		use arm_gic::gicv3::{GicV3, SgiTarget};
145
146		use crate::interrupts::SGI_RESCHED;
147
148		unsafe {
149			asm!("dsb nsh", "isb", options(nostack, nomem, preserves_flags));
150		}
151
152		let reschedid = IntId::sgi(SGI_RESCHED.into());
153		GicV3::send_sgi(
154			reschedid,
155			SgiTarget::List {
156				affinity3: 0,
157				affinity2: 0,
158				affinity1: 0,
159				target_list: 0b1,
160			},
161		);
162
163		interrupts::enable();
164	}
165
166	#[cfg(target_arch = "riscv64")]
167	fn reschedule(self) {
168		without_interrupts(|| self.scheduler());
169	}
170
171	#[cfg(any(feature = "tcp", feature = "udp"))]
172	fn add_network_timer(self, wakeup_time: Option<u64>) {
173		without_interrupts(|| {
174			self.blocked_tasks.add_network_timer(wakeup_time);
175		});
176	}
177
178	fn exit(self, exit_code: i32) -> ! {
179		without_interrupts(|| {
180			// Get the current task.
181			let mut current_task_borrowed = self.current_task.borrow_mut();
182			assert_ne!(
183				current_task_borrowed.status,
184				TaskStatus::Idle,
185				"Trying to terminate the idle task"
186			);
187
188			// Finish the task and reschedule.
189			debug!(
190				"Finishing task {} with exit code {}",
191				current_task_borrowed.id, exit_code
192			);
193			current_task_borrowed.status = TaskStatus::Finished;
194			NO_TASKS.fetch_sub(1, Ordering::SeqCst);
195
196			let current_id = current_task_borrowed.id;
197			drop(current_task_borrowed);
198
199			// wakeup tasks, which are waiting for task with the identifier id
200			if let Some(mut queue) = WAITING_TASKS.lock().remove(&current_id) {
201				while let Some(task) = queue.pop_front() {
202					self.custom_wakeup(task);
203				}
204			}
205		});
206
207		self.reschedule();
208		unreachable!()
209	}
210}
211
212struct NewTask {
213	tid: TaskId,
214	func: unsafe extern "C" fn(usize),
215	arg: usize,
216	prio: Priority,
217	core_id: CoreId,
218	stacks: TaskStacks,
219	object_map:
220		Arc<async_lock::RwLock<HashMap<FileDescriptor, Arc<dyn ObjectInterface>, RandomState>>>,
221}
222
223impl From<NewTask> for Task {
224	fn from(value: NewTask) -> Self {
225		let NewTask {
226			tid,
227			func,
228			arg,
229			prio,
230			core_id,
231			stacks,
232			object_map,
233		} = value;
234		let mut task = Self::new(tid, core_id, TaskStatus::Ready, prio, stacks, object_map);
235		task.create_stack_frame(func, arg);
236		task
237	}
238}
239
240impl PerCoreScheduler {
241	/// Spawn a new task.
242	pub unsafe fn spawn(
243		func: unsafe extern "C" fn(usize),
244		arg: usize,
245		prio: Priority,
246		core_id: CoreId,
247		stack_size: usize,
248	) -> TaskId {
249		// Create the new task.
250		let tid = get_tid();
251		let stacks = TaskStacks::new(stack_size);
252		let new_task = NewTask {
253			tid,
254			func,
255			arg,
256			prio,
257			core_id,
258			stacks,
259			object_map: core_scheduler().get_current_task_object_map(),
260		};
261
262		// Add it to the task lists.
263		let wakeup = {
264			#[cfg(feature = "smp")]
265			let mut input_locked = get_scheduler_input(core_id).lock();
266			WAITING_TASKS.lock().insert(tid, VecDeque::with_capacity(1));
267			TASKS.lock().insert(
268				tid,
269				TaskHandle::new(
270					tid,
271					prio,
272					#[cfg(feature = "smp")]
273					core_id,
274				),
275			);
276			NO_TASKS.fetch_add(1, Ordering::SeqCst);
277
278			#[cfg(feature = "smp")]
279			if core_id == core_scheduler().core_id {
280				let task = Rc::new(RefCell::new(Task::from(new_task)));
281				core_scheduler().ready_queue.push(task);
282				false
283			} else {
284				input_locked.new_tasks.push_back(new_task);
285				true
286			}
287			#[cfg(not(feature = "smp"))]
288			if core_id == 0 {
289				let task = Rc::new(RefCell::new(Task::from(new_task)));
290				core_scheduler().ready_queue.push(task);
291				false
292			} else {
293				panic!("Invalid  core_id {}!", core_id)
294			}
295		};
296
297		debug!("Creating task {tid} with priority {prio} on core {core_id}");
298
299		if wakeup {
300			arch::wakeup_core(core_id);
301		}
302
303		tid
304	}
305
306	#[cfg(feature = "newlib")]
307	fn clone_impl(&self, func: extern "C" fn(usize), arg: usize) -> TaskId {
308		static NEXT_CORE_ID: AtomicU32 = AtomicU32::new(1);
309
310		// Get the Core ID of the next CPU.
311		let core_id: CoreId = {
312			// Increase the CPU number by 1.
313			let id = NEXT_CORE_ID.fetch_add(1, Ordering::SeqCst);
314
315			// Check for overflow.
316			if id == arch::get_processor_count() {
317				NEXT_CORE_ID.store(0, Ordering::SeqCst);
318				0
319			} else {
320				id
321			}
322		};
323
324		// Get the current task.
325		let current_task_borrowed = self.current_task.borrow();
326
327		// Clone the current task.
328		let tid = get_tid();
329		let clone_task = NewTask {
330			tid,
331			func,
332			arg,
333			prio: current_task_borrowed.prio,
334			core_id,
335			stacks: TaskStacks::new(current_task_borrowed.stacks.get_user_stack_size()),
336			object_map: current_task_borrowed.object_map.clone(),
337		};
338
339		// Add it to the task lists.
340		let wakeup = {
341			#[cfg(feature = "smp")]
342			let mut input_locked = get_scheduler_input(core_id).lock();
343			WAITING_TASKS.lock().insert(tid, VecDeque::with_capacity(1));
344			TASKS.lock().insert(
345				tid,
346				TaskHandle::new(
347					tid,
348					current_task_borrowed.prio,
349					#[cfg(feature = "smp")]
350					core_id,
351				),
352			);
353			NO_TASKS.fetch_add(1, Ordering::SeqCst);
354			#[cfg(feature = "smp")]
355			if core_id == core_scheduler().core_id {
356				let clone_task = Rc::new(RefCell::new(Task::from(clone_task)));
357				core_scheduler().ready_queue.push(clone_task);
358				false
359			} else {
360				input_locked.new_tasks.push_back(clone_task);
361				true
362			}
363			#[cfg(not(feature = "smp"))]
364			if core_id == 0 {
365				let clone_task = Rc::new(RefCell::new(Task::from(clone_task)));
366				core_scheduler().ready_queue.push(clone_task);
367				false
368			} else {
369				panic!("Invalid core_id {}!", core_id);
370			}
371		};
372
373		// Wake up the CPU
374		if wakeup {
375			arch::wakeup_core(core_id);
376		}
377
378		tid
379	}
380
381	#[cfg(feature = "newlib")]
382	pub fn clone(&self, func: extern "C" fn(usize), arg: usize) -> TaskId {
383		without_interrupts(|| self.clone_impl(func, arg))
384	}
385
386	/// Returns `true` if a reschedule is required
387	#[inline]
388	#[cfg(all(any(target_arch = "x86_64", target_arch = "riscv64"), feature = "smp"))]
389	pub fn is_scheduling(&self) -> bool {
390		self.current_task.borrow().prio < self.ready_queue.get_highest_priority()
391	}
392
393	#[inline]
394	pub fn handle_waiting_tasks(&mut self) {
395		without_interrupts(|| {
396			crate::executor::run();
397			for task in self.blocked_tasks.handle_waiting_tasks() {
398				self.ready_queue.push(task);
399			}
400		});
401	}
402
403	#[cfg(not(feature = "smp"))]
404	pub fn custom_wakeup(&mut self, task: TaskHandle) {
405		without_interrupts(|| {
406			let task = self.blocked_tasks.custom_wakeup(task);
407			self.ready_queue.push(task);
408		});
409	}
410
411	#[cfg(feature = "smp")]
412	pub fn custom_wakeup(&mut self, task: TaskHandle) {
413		if task.get_core_id() == self.core_id {
414			without_interrupts(|| {
415				let task = self.blocked_tasks.custom_wakeup(task);
416				self.ready_queue.push(task);
417			});
418		} else {
419			get_scheduler_input(task.get_core_id())
420				.lock()
421				.wakeup_tasks
422				.push_back(task);
423			// Wake up the CPU
424			arch::wakeup_core(task.get_core_id());
425		}
426	}
427
428	#[inline]
429	pub fn block_current_task(&mut self, wakeup_time: Option<u64>) {
430		without_interrupts(|| {
431			self.blocked_tasks
432				.add(self.current_task.clone(), wakeup_time);
433		});
434	}
435
436	#[inline]
437	pub fn get_current_task_handle(&self) -> TaskHandle {
438		without_interrupts(|| {
439			let current_task_borrowed = self.current_task.borrow();
440
441			TaskHandle::new(
442				current_task_borrowed.id,
443				current_task_borrowed.prio,
444				#[cfg(feature = "smp")]
445				current_task_borrowed.core_id,
446			)
447		})
448	}
449
450	#[inline]
451	pub fn get_current_task_id(&self) -> TaskId {
452		without_interrupts(|| self.current_task.borrow().id)
453	}
454
455	#[inline]
456	pub fn get_current_task_object_map(
457		&self,
458	) -> Arc<async_lock::RwLock<HashMap<FileDescriptor, Arc<dyn ObjectInterface>, RandomState>>> {
459		without_interrupts(|| self.current_task.borrow().object_map.clone())
460	}
461
462	/// Map a file descriptor to their IO interface and returns
463	/// the shared reference
464	#[inline]
465	pub async fn get_object(&self, fd: FileDescriptor) -> io::Result<Arc<dyn ObjectInterface>> {
466		future::poll_fn(|cx| {
467			without_interrupts(|| {
468				let borrowed = self.current_task.borrow();
469				let mut pinned_obj = core::pin::pin!(borrowed.object_map.read());
470
471				let guard = ready!(pinned_obj.as_mut().poll(cx));
472				Ready(guard.get(&fd).cloned().ok_or(io::Error::EBADF))
473			})
474		})
475		.await
476	}
477
478	/// Creates a new map between file descriptor and their IO interface and
479	/// clone the standard descriptors.
480	#[allow(dead_code)]
481	pub async fn recreate_objmap(&self) -> io::Result<()> {
482		let mut map = HashMap::<FileDescriptor, Arc<dyn ObjectInterface>, RandomState>::with_hasher(
483			RandomState::with_seeds(0, 0, 0, 0),
484		);
485
486		future::poll_fn(|cx| {
487			without_interrupts(|| {
488				let borrowed = self.current_task.borrow();
489				let mut pinned_obj = core::pin::pin!(borrowed.object_map.read());
490
491				let guard = ready!(pinned_obj.as_mut().poll(cx));
492				// clone standard file descriptors
493				for i in 0..3 {
494					if let Some(obj) = guard.get(&i) {
495						map.insert(i, obj.clone());
496					}
497				}
498
499				Ready(io::Result::Ok(()))
500			})
501		})
502		.await?;
503
504		without_interrupts(|| {
505			self.current_task.borrow_mut().object_map = Arc::new(async_lock::RwLock::new(map));
506		});
507
508		Ok(())
509	}
510
511	/// Insert a new IO interface and returns a file descriptor as
512	/// identifier to this object
513	pub async fn insert_object(&self, obj: Arc<dyn ObjectInterface>) -> io::Result<FileDescriptor> {
514		future::poll_fn(|cx| {
515			without_interrupts(|| {
516				let borrowed = self.current_task.borrow();
517				let mut pinned_obj = core::pin::pin!(borrowed.object_map.write());
518
519				let mut guard = ready!(pinned_obj.as_mut().poll(cx));
520				let new_fd = || -> io::Result<FileDescriptor> {
521					let mut fd: FileDescriptor = 0;
522					loop {
523						if !guard.contains_key(&fd) {
524							break Ok(fd);
525						} else if fd == FileDescriptor::MAX {
526							break Err(io::Error::EOVERFLOW);
527						}
528
529						fd = fd.saturating_add(1);
530					}
531				};
532
533				let fd = new_fd()?;
534				let _ = guard.insert(fd, obj.clone());
535				Ready(Ok(fd))
536			})
537		})
538		.await
539	}
540
541	/// Duplicate a IO interface and returns a new file descriptor as
542	/// identifier to the new copy
543	pub async fn dup_object(&self, fd: FileDescriptor) -> io::Result<FileDescriptor> {
544		future::poll_fn(|cx| {
545			without_interrupts(|| {
546				let borrowed = self.current_task.borrow();
547				let mut pinned_obj = core::pin::pin!(borrowed.object_map.write());
548
549				let mut guard = ready!(pinned_obj.as_mut().poll(cx));
550				let obj = (*(guard.get(&fd).ok_or(io::Error::EINVAL)?)).clone();
551
552				let new_fd = || -> io::Result<FileDescriptor> {
553					let mut fd: FileDescriptor = 0;
554					loop {
555						if !guard.contains_key(&fd) {
556							break Ok(fd);
557						} else if fd == FileDescriptor::MAX {
558							break Err(io::Error::EOVERFLOW);
559						}
560
561						fd = fd.saturating_add(1);
562					}
563				};
564
565				let fd = new_fd()?;
566				if guard.try_insert(fd, obj).is_err() {
567					Ready(Err(io::Error::EMFILE))
568				} else {
569					Ready(Ok(fd))
570				}
571			})
572		})
573		.await
574	}
575
576	pub async fn dup_object2(
577		&self,
578		fd1: FileDescriptor,
579		fd2: FileDescriptor,
580	) -> io::Result<FileDescriptor> {
581		future::poll_fn(|cx| {
582			without_interrupts(|| {
583				let borrowed = self.current_task.borrow();
584				let mut pinned_obj = core::pin::pin!(borrowed.object_map.write());
585				let mut guard = ready!(pinned_obj.as_mut().poll(cx));
586				let obj = guard.get(&fd1).cloned().ok_or(io::Error::EBADF)?;
587
588				if guard.try_insert(fd2, obj).is_err() {
589					Ready(Err(io::Error::EMFILE))
590				} else {
591					Ready(Ok(fd2))
592				}
593			})
594		})
595		.await
596	}
597
598	/// Remove a IO interface, which is named by the file descriptor
599	pub async fn remove_object(&self, fd: FileDescriptor) -> io::Result<Arc<dyn ObjectInterface>> {
600		future::poll_fn(|cx| {
601			without_interrupts(|| {
602				let borrowed = self.current_task.borrow();
603				let mut pinned_obj = core::pin::pin!(borrowed.object_map.write());
604				let mut guard = ready!(pinned_obj.as_mut().poll(cx));
605				Ready(guard.remove(&fd).ok_or(io::Error::EBADF))
606			})
607		})
608		.await
609	}
610
611	#[inline]
612	pub fn get_current_task_prio(&self) -> Priority {
613		without_interrupts(|| self.current_task.borrow().prio)
614	}
615
616	/// Returns reference to prio_bitmap
617	#[allow(dead_code)]
618	#[inline]
619	pub fn get_priority_bitmap(&self) -> &u64 {
620		self.ready_queue.get_priority_bitmap()
621	}
622
623	#[cfg(target_arch = "x86_64")]
624	pub fn set_current_kernel_stack(&self) {
625		let current_task_borrowed = self.current_task.borrow();
626		let tss = unsafe { &mut *CoreLocal::get().tss.get() };
627
628		let rsp = current_task_borrowed.stacks.get_kernel_stack()
629			+ current_task_borrowed.stacks.get_kernel_stack_size() as u64
630			- TaskStacks::MARKER_SIZE as u64;
631		tss.privilege_stack_table[0] = rsp.into();
632		CoreLocal::get().kernel_stack.set(rsp.as_mut_ptr());
633		let ist_start = current_task_borrowed.stacks.get_interrupt_stack()
634			+ current_task_borrowed.stacks.get_interrupt_stack_size() as u64
635			- TaskStacks::MARKER_SIZE as u64;
636		tss.interrupt_stack_table[0] = ist_start.into();
637	}
638
639	pub fn set_current_task_priority(&mut self, prio: Priority) {
640		without_interrupts(|| {
641			trace!("Change priority of the current task");
642			self.current_task.borrow_mut().prio = prio;
643		});
644	}
645
646	pub fn set_priority(&mut self, id: TaskId, prio: Priority) -> Result<(), ()> {
647		trace!("Change priority of task {id} to priority {prio}");
648
649		without_interrupts(|| {
650			let task = get_task_handle(id).ok_or(())?;
651			#[cfg(feature = "smp")]
652			let other_core = task.get_core_id() != self.core_id;
653			#[cfg(not(feature = "smp"))]
654			let other_core = false;
655
656			if other_core {
657				warn!("Have to change the priority on another core");
658			} else if self.current_task.borrow().id == task.get_id() {
659				self.current_task.borrow_mut().prio = prio;
660			} else {
661				self.ready_queue
662					.set_priority(task, prio)
663					.expect("Do not find valid task in ready queue");
664			}
665
666			Ok(())
667		})
668	}
669
670	#[cfg(target_arch = "riscv64")]
671	pub fn set_current_kernel_stack(&self) {
672		let current_task_borrowed = self.current_task.borrow();
673
674		let stack = (current_task_borrowed.stacks.get_kernel_stack()
675			+ current_task_borrowed.stacks.get_kernel_stack_size() as u64
676			- TaskStacks::MARKER_SIZE as u64)
677			.as_u64();
678		CoreLocal::get().kernel_stack.set(stack);
679	}
680
681	/// Save the FPU context for the current FPU owner and restore it for the current task,
682	/// which wants to use the FPU now.
683	#[cfg(target_arch = "x86_64")]
684	pub fn fpu_switch(&mut self) {
685		if !Rc::ptr_eq(&self.current_task, &self.fpu_owner) {
686			debug!(
687				"Switching FPU owner from task {} to {}",
688				self.fpu_owner.borrow().id,
689				self.current_task.borrow().id
690			);
691
692			self.fpu_owner.borrow_mut().last_fpu_state.save();
693			self.current_task.borrow().last_fpu_state.restore();
694			self.fpu_owner = self.current_task.clone();
695		}
696	}
697
698	/// Check if a finished task could be deleted.
699	fn cleanup_tasks(&mut self) {
700		// Pop the first finished task and remove it from the TASKS list, which implicitly deallocates all associated memory.
701		while let Some(finished_task) = self.finished_tasks.pop_front() {
702			debug!("Cleaning up task {}", finished_task.borrow().id);
703		}
704	}
705
706	#[cfg(all(any(target_arch = "x86_64", target_arch = "riscv64"), feature = "smp"))]
707	pub fn check_input(&mut self) {
708		let mut input_locked = CoreLocal::get().scheduler_input.lock();
709
710		while let Some(task) = input_locked.wakeup_tasks.pop_front() {
711			let task = self.blocked_tasks.custom_wakeup(task);
712			self.ready_queue.push(task);
713		}
714
715		while let Some(new_task) = input_locked.new_tasks.pop_front() {
716			let task = Rc::new(RefCell::new(Task::from(new_task)));
717			self.ready_queue.push(task.clone());
718		}
719	}
720
721	/// Only the idle task should call this function.
722	/// Set the idle task to halt state if not another
723	/// available.
724	pub fn run() -> ! {
725		let backoff = Backoff::new();
726
727		loop {
728			let core_scheduler = core_scheduler();
729			interrupts::disable();
730
731			// run async tasks
732			crate::executor::run();
733
734			// do housekeeping
735			#[cfg(all(any(target_arch = "x86_64", target_arch = "riscv64"), feature = "smp"))]
736			core_scheduler.check_input();
737			core_scheduler.cleanup_tasks();
738
739			if core_scheduler.ready_queue.is_empty() {
740				if backoff.is_completed() {
741					interrupts::enable_and_wait();
742					backoff.reset();
743				} else {
744					interrupts::enable();
745					backoff.snooze();
746				}
747			} else {
748				interrupts::enable();
749				core_scheduler.reschedule();
750				backoff.reset();
751			}
752		}
753	}
754
755	#[inline]
756	#[cfg(target_arch = "aarch64")]
757	pub fn get_last_stack_pointer(&self) -> memory_addresses::VirtAddr {
758		self.current_task.borrow().last_stack_pointer
759	}
760
761	/// Triggers the scheduler to reschedule the tasks.
762	/// Interrupt flag must be cleared before calling this function.
763	pub fn scheduler(&mut self) -> Option<*mut usize> {
764		// run background tasks
765		crate::executor::run();
766
767		// Someone wants to give up the CPU
768		// => we have time to cleanup the system
769		self.cleanup_tasks();
770
771		// Get information about the current task.
772		let (id, last_stack_pointer, prio, status) = {
773			let mut borrowed = self.current_task.borrow_mut();
774			(
775				borrowed.id,
776				ptr::from_mut(&mut borrowed.last_stack_pointer).cast::<usize>(),
777				borrowed.prio,
778				borrowed.status,
779			)
780		};
781
782		let mut new_task = None;
783
784		if status == TaskStatus::Running {
785			// A task is currently running.
786			// Check if a task with a equal or higher priority is available.
787			if let Some(task) = self.ready_queue.pop_with_prio(prio) {
788				new_task = Some(task);
789			}
790		} else {
791			if status == TaskStatus::Finished {
792				// Mark the finished task as invalid and add it to the finished tasks for a later cleanup.
793				self.current_task.borrow_mut().status = TaskStatus::Invalid;
794				self.finished_tasks.push_back(self.current_task.clone());
795			}
796
797			// No task is currently running.
798			// Check if there is any available task and get the one with the highest priority.
799			if let Some(task) = self.ready_queue.pop() {
800				// This available task becomes the new task.
801				debug!("Task is available.");
802				new_task = Some(task);
803			} else if status != TaskStatus::Idle {
804				// The Idle task becomes the new task.
805				debug!("Only Idle Task is available.");
806				new_task = Some(self.idle_task.clone());
807			}
808		}
809
810		if let Some(task) = new_task {
811			// There is a new task we want to switch to.
812
813			// Handle the current task.
814			if status == TaskStatus::Running {
815				// Mark the running task as ready again and add it back to the queue.
816				self.current_task.borrow_mut().status = TaskStatus::Ready;
817				self.ready_queue.push(self.current_task.clone());
818			}
819
820			// Handle the new task and get information about it.
821			let (new_id, new_stack_pointer) = {
822				let mut borrowed = task.borrow_mut();
823				if borrowed.status != TaskStatus::Idle {
824					// Mark the new task as running.
825					borrowed.status = TaskStatus::Running;
826				}
827
828				(borrowed.id, borrowed.last_stack_pointer)
829			};
830
831			if id != new_id {
832				// Tell the scheduler about the new task.
833				debug!(
834					"Switching task from {} to {} (stack {:#X} => {:p})",
835					id,
836					new_id,
837					unsafe { *last_stack_pointer },
838					new_stack_pointer
839				);
840				#[cfg(not(target_arch = "riscv64"))]
841				{
842					self.current_task = task;
843				}
844
845				// Finally return the context of the new task.
846				#[cfg(not(target_arch = "riscv64"))]
847				return Some(last_stack_pointer);
848
849				#[cfg(target_arch = "riscv64")]
850				{
851					if sstatus::read().fs() == sstatus::FS::Dirty {
852						self.current_task.borrow_mut().last_fpu_state.save();
853					}
854					task.borrow().last_fpu_state.restore();
855					self.current_task = task;
856					unsafe {
857						switch_to_task(last_stack_pointer, new_stack_pointer.as_usize());
858					}
859				}
860			}
861		}
862
863		None
864	}
865}
866
867fn get_tid() -> TaskId {
868	static TID_COUNTER: AtomicI32 = AtomicI32::new(0);
869	let guard = TASKS.lock();
870
871	loop {
872		let id = TaskId::from(TID_COUNTER.fetch_add(1, Ordering::SeqCst));
873		if !guard.contains_key(&id) {
874			return id;
875		}
876	}
877}
878
879#[inline]
880pub(crate) fn abort() -> ! {
881	core_scheduler().exit(-1)
882}
883
884/// Add a per-core scheduler for the current core.
885pub(crate) fn add_current_core() {
886	// Create an idle task for this core.
887	let core_id = core_id();
888	let tid = get_tid();
889	let idle_task = Rc::new(RefCell::new(Task::new_idle(tid, core_id)));
890
891	// Add the ID -> Task mapping.
892	WAITING_TASKS.lock().insert(tid, VecDeque::with_capacity(1));
893	TASKS.lock().insert(
894		tid,
895		TaskHandle::new(
896			tid,
897			IDLE_PRIO,
898			#[cfg(feature = "smp")]
899			core_id,
900		),
901	);
902	// Initialize a scheduler for this core.
903	debug!("Initializing scheduler for core {core_id} with idle task {tid}");
904	let boxed_scheduler = Box::new(PerCoreScheduler {
905		#[cfg(feature = "smp")]
906		core_id,
907		current_task: idle_task.clone(),
908		#[cfg(target_arch = "x86_64")]
909		fpu_owner: idle_task.clone(),
910		idle_task,
911		ready_queue: PriorityTaskQueue::new(),
912		finished_tasks: VecDeque::new(),
913		blocked_tasks: BlockedTaskQueue::new(),
914	});
915
916	let scheduler = Box::into_raw(boxed_scheduler);
917	set_core_scheduler(scheduler);
918	#[cfg(feature = "smp")]
919	{
920		SCHEDULER_INPUTS.lock().insert(
921			core_id.try_into().unwrap(),
922			&CoreLocal::get().scheduler_input,
923		);
924		#[cfg(target_arch = "x86_64")]
925		CORE_HLT_STATE
926			.lock()
927			.insert(core_id.try_into().unwrap(), &CoreLocal::get().hlt);
928	}
929}
930
931#[inline]
932#[cfg(all(target_arch = "x86_64", feature = "smp", not(feature = "idle-poll")))]
933pub(crate) fn take_core_hlt_state(core_id: CoreId) -> bool {
934	CORE_HLT_STATE.lock()[usize::try_from(core_id).unwrap()].swap(false, Ordering::Acquire)
935}
936
937#[inline]
938#[cfg(feature = "smp")]
939fn get_scheduler_input(core_id: CoreId) -> &'static InterruptTicketMutex<SchedulerInput> {
940	SCHEDULER_INPUTS.lock()[usize::try_from(core_id).unwrap()]
941}
942
943pub unsafe fn spawn(
944	func: unsafe extern "C" fn(usize),
945	arg: usize,
946	prio: Priority,
947	stack_size: usize,
948	selector: isize,
949) -> TaskId {
950	static CORE_COUNTER: AtomicU32 = AtomicU32::new(1);
951
952	let core_id = if selector < 0 {
953		// use Round Robin to schedule the cores
954		CORE_COUNTER.fetch_add(1, Ordering::SeqCst) % get_processor_count()
955	} else {
956		selector as u32
957	};
958
959	unsafe { PerCoreScheduler::spawn(func, arg, prio, core_id, stack_size) }
960}
961
962#[allow(clippy::result_unit_err)]
963pub fn join(id: TaskId) -> Result<(), ()> {
964	let core_scheduler = core_scheduler();
965
966	debug!(
967		"Task {} is waiting for task {}",
968		core_scheduler.get_current_task_id(),
969		id
970	);
971
972	loop {
973		let mut waiting_tasks_guard = WAITING_TASKS.lock();
974
975		if let Some(queue) = waiting_tasks_guard.get_mut(&id) {
976			queue.push_back(core_scheduler.get_current_task_handle());
977			core_scheduler.block_current_task(None);
978
979			// Switch to the next task.
980			drop(waiting_tasks_guard);
981			core_scheduler.reschedule();
982		} else {
983			return Ok(());
984		}
985	}
986}
987
988pub fn shutdown(arg: i32) -> ! {
989	crate::syscalls::shutdown(arg)
990}
991
992fn get_task_handle(id: TaskId) -> Option<TaskHandle> {
993	TASKS.lock().get(&id).copied()
994}
995
996#[cfg(all(target_arch = "x86_64", feature = "common-os"))]
997pub(crate) static BOOT_ROOT_PAGE_TABLE: OnceCell<usize> = OnceCell::new();
998
999#[cfg(all(target_arch = "x86_64", feature = "common-os"))]
1000pub(crate) fn get_root_page_table() -> usize {
1001	let current_task_borrowed = core_scheduler().current_task.borrow_mut();
1002	current_task_borrowed.root_page_table
1003}