hermit/scheduler/
mod.rs

1#![allow(clippy::type_complexity)]
2
3use alloc::boxed::Box;
4use alloc::collections::{BTreeMap, VecDeque};
5use alloc::rc::Rc;
6use alloc::sync::Arc;
7#[cfg(feature = "smp")]
8use alloc::vec::Vec;
9use core::cell::RefCell;
10use core::future::{self, Future};
11use core::ptr;
12#[cfg(all(target_arch = "x86_64", feature = "smp"))]
13use core::sync::atomic::AtomicBool;
14use core::sync::atomic::{AtomicI32, AtomicU32, Ordering};
15use core::task::Poll::Ready;
16use core::task::ready;
17
18use ahash::RandomState;
19use crossbeam_utils::Backoff;
20use hashbrown::HashMap;
21use hermit_sync::*;
22#[cfg(target_arch = "riscv64")]
23use riscv::register::sstatus;
24
25use crate::arch::core_local::*;
26#[cfg(target_arch = "riscv64")]
27use crate::arch::switch::switch_to_task;
28#[cfg(target_arch = "x86_64")]
29use crate::arch::switch::{switch_to_fpu_owner, switch_to_task};
30use crate::arch::{get_processor_count, interrupts};
31use crate::errno::Errno;
32use crate::fd::{FileDescriptor, ObjectInterface};
33use crate::kernel::scheduler::TaskStacks;
34use crate::scheduler::task::*;
35use crate::{arch, io};
36
37pub mod task;
38
39static NO_TASKS: AtomicU32 = AtomicU32::new(0);
40/// Map between Core ID and per-core scheduler
41#[cfg(feature = "smp")]
42static SCHEDULER_INPUTS: SpinMutex<Vec<&InterruptTicketMutex<SchedulerInput>>> =
43	SpinMutex::new(Vec::new());
44#[cfg(all(target_arch = "x86_64", feature = "smp"))]
45static CORE_HLT_STATE: SpinMutex<Vec<&AtomicBool>> = SpinMutex::new(Vec::new());
46/// Map between Task ID and Queue of waiting tasks
47static WAITING_TASKS: InterruptTicketMutex<BTreeMap<TaskId, VecDeque<TaskHandle>>> =
48	InterruptTicketMutex::new(BTreeMap::new());
49/// Map between Task ID and TaskHandle
50static TASKS: InterruptTicketMutex<BTreeMap<TaskId, TaskHandle>> =
51	InterruptTicketMutex::new(BTreeMap::new());
52
53/// Unique identifier for a core.
54pub type CoreId = u32;
55
56#[cfg(feature = "smp")]
57pub(crate) struct SchedulerInput {
58	/// Queue of new tasks
59	new_tasks: VecDeque<NewTask>,
60	/// Queue of task, which are wakeup by another core
61	wakeup_tasks: VecDeque<TaskHandle>,
62}
63
64#[cfg(feature = "smp")]
65impl SchedulerInput {
66	pub fn new() -> Self {
67		Self {
68			new_tasks: VecDeque::new(),
69			wakeup_tasks: VecDeque::new(),
70		}
71	}
72}
73
74#[cfg_attr(any(target_arch = "x86_64", target_arch = "aarch64"), repr(align(128)))]
75#[cfg_attr(
76	not(any(target_arch = "x86_64", target_arch = "aarch64")),
77	repr(align(64))
78)]
79pub(crate) struct PerCoreScheduler {
80	/// Core ID of this per-core scheduler
81	#[cfg(feature = "smp")]
82	core_id: CoreId,
83	/// Task which is currently running
84	current_task: Rc<RefCell<Task>>,
85	/// Idle Task
86	idle_task: Rc<RefCell<Task>>,
87	/// Task that currently owns the FPU
88	#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
89	fpu_owner: Rc<RefCell<Task>>,
90	/// Queue of tasks, which are ready
91	ready_queue: PriorityTaskQueue,
92	/// Queue of tasks, which are finished and can be released
93	finished_tasks: VecDeque<Rc<RefCell<Task>>>,
94	/// Queue of blocked tasks, sorted by wakeup time.
95	blocked_tasks: BlockedTaskQueue,
96}
97
98pub(crate) trait PerCoreSchedulerExt {
99	/// Triggers the scheduler to reschedule the tasks.
100	/// Interrupt flag will be cleared during the reschedule
101	fn reschedule(self);
102
103	#[cfg(any(feature = "tcp", feature = "udp"))]
104	fn add_network_timer(self, wakeup_time: Option<u64>);
105
106	/// Terminate the current task on the current core.
107	fn exit(self, exit_code: i32) -> !;
108}
109
110impl PerCoreSchedulerExt for &mut PerCoreScheduler {
111	#[cfg(target_arch = "x86_64")]
112	fn reschedule(self) {
113		without_interrupts(|| {
114			if let Some(last_stack_pointer) = self.scheduler() {
115				let (new_stack_pointer, is_idle) = {
116					let borrowed = self.current_task.borrow();
117					(
118						borrowed.last_stack_pointer,
119						borrowed.status == TaskStatus::Idle,
120					)
121				};
122
123				if is_idle || Rc::ptr_eq(&self.current_task, &self.fpu_owner) {
124					unsafe {
125						switch_to_fpu_owner(
126							last_stack_pointer,
127							new_stack_pointer.as_u64() as usize,
128						);
129					}
130				} else {
131					unsafe {
132						switch_to_task(last_stack_pointer, new_stack_pointer.as_u64() as usize);
133					}
134				}
135			}
136		});
137	}
138
139	/// Trigger an interrupt to reschedule the system
140	#[cfg(target_arch = "aarch64")]
141	fn reschedule(self) {
142		use core::arch::asm;
143
144		use arm_gic::IntId;
145		use arm_gic::gicv3::{GicV3, SgiTarget, SgiTargetGroup};
146
147		use crate::interrupts::SGI_RESCHED;
148
149		unsafe {
150			asm!("dsb nsh", "isb", options(nostack, nomem, preserves_flags));
151		}
152
153		let reschedid = IntId::sgi(SGI_RESCHED.into());
154		#[cfg(feature = "smp")]
155		let core_id = self.core_id;
156		#[cfg(not(feature = "smp"))]
157		let core_id = 0;
158
159		GicV3::send_sgi(
160			reschedid,
161			SgiTarget::List {
162				affinity3: 0,
163				affinity2: 0,
164				affinity1: 0,
165				target_list: 1 << core_id,
166			},
167			SgiTargetGroup::CurrentGroup1,
168		);
169
170		interrupts::enable();
171	}
172
173	#[cfg(target_arch = "riscv64")]
174	fn reschedule(self) {
175		without_interrupts(|| self.scheduler());
176	}
177
178	#[cfg(any(feature = "tcp", feature = "udp"))]
179	fn add_network_timer(self, wakeup_time: Option<u64>) {
180		without_interrupts(|| {
181			self.blocked_tasks.add_network_timer(wakeup_time);
182		});
183	}
184
185	fn exit(self, exit_code: i32) -> ! {
186		without_interrupts(|| {
187			// Get the current task.
188			let mut current_task_borrowed = self.current_task.borrow_mut();
189			assert_ne!(
190				current_task_borrowed.status,
191				TaskStatus::Idle,
192				"Trying to terminate the idle task"
193			);
194
195			// Finish the task and reschedule.
196			debug!(
197				"Finishing task {} with exit code {}",
198				current_task_borrowed.id, exit_code
199			);
200			current_task_borrowed.status = TaskStatus::Finished;
201			NO_TASKS.fetch_sub(1, Ordering::SeqCst);
202
203			let current_id = current_task_borrowed.id;
204			drop(current_task_borrowed);
205
206			// wakeup tasks, which are waiting for task with the identifier id
207			if let Some(mut queue) = WAITING_TASKS.lock().remove(&current_id) {
208				while let Some(task) = queue.pop_front() {
209					self.custom_wakeup(task);
210				}
211			}
212		});
213
214		self.reschedule();
215		unreachable!()
216	}
217}
218
219struct NewTask {
220	tid: TaskId,
221	func: unsafe extern "C" fn(usize),
222	arg: usize,
223	prio: Priority,
224	core_id: CoreId,
225	stacks: TaskStacks,
226	object_map:
227		Arc<async_lock::RwLock<HashMap<FileDescriptor, Arc<dyn ObjectInterface>, RandomState>>>,
228}
229
230impl From<NewTask> for Task {
231	fn from(value: NewTask) -> Self {
232		let NewTask {
233			tid,
234			func,
235			arg,
236			prio,
237			core_id,
238			stacks,
239			object_map,
240		} = value;
241		let mut task = Self::new(tid, core_id, TaskStatus::Ready, prio, stacks, object_map);
242		task.create_stack_frame(func, arg);
243		task
244	}
245}
246
247impl PerCoreScheduler {
248	/// Spawn a new task.
249	pub unsafe fn spawn(
250		func: unsafe extern "C" fn(usize),
251		arg: usize,
252		prio: Priority,
253		core_id: CoreId,
254		stack_size: usize,
255	) -> TaskId {
256		// Create the new task.
257		let tid = get_tid();
258		let stacks = TaskStacks::new(stack_size);
259		let new_task = NewTask {
260			tid,
261			func,
262			arg,
263			prio,
264			core_id,
265			stacks,
266			object_map: core_scheduler().get_current_task_object_map(),
267		};
268
269		// Add it to the task lists.
270		let wakeup = {
271			#[cfg(feature = "smp")]
272			let mut input_locked = get_scheduler_input(core_id).lock();
273			WAITING_TASKS.lock().insert(tid, VecDeque::with_capacity(1));
274			TASKS.lock().insert(
275				tid,
276				TaskHandle::new(
277					tid,
278					prio,
279					#[cfg(feature = "smp")]
280					core_id,
281				),
282			);
283			NO_TASKS.fetch_add(1, Ordering::SeqCst);
284
285			#[cfg(feature = "smp")]
286			if core_id == core_scheduler().core_id {
287				let task = Rc::new(RefCell::new(Task::from(new_task)));
288				core_scheduler().ready_queue.push(task);
289				false
290			} else {
291				input_locked.new_tasks.push_back(new_task);
292				true
293			}
294			#[cfg(not(feature = "smp"))]
295			if core_id == 0 {
296				let task = Rc::new(RefCell::new(Task::from(new_task)));
297				core_scheduler().ready_queue.push(task);
298				false
299			} else {
300				panic!("Invalid  core_id {}!", core_id)
301			}
302		};
303
304		debug!("Creating task {tid} with priority {prio} on core {core_id}");
305
306		if wakeup {
307			arch::wakeup_core(core_id);
308		}
309
310		tid
311	}
312
313	#[cfg(feature = "newlib")]
314	fn clone_impl(&self, func: extern "C" fn(usize), arg: usize) -> TaskId {
315		static NEXT_CORE_ID: AtomicU32 = AtomicU32::new(1);
316
317		// Get the Core ID of the next CPU.
318		let core_id: CoreId = {
319			// Increase the CPU number by 1.
320			let id = NEXT_CORE_ID.fetch_add(1, Ordering::SeqCst);
321
322			// Check for overflow.
323			if id == arch::get_processor_count() {
324				NEXT_CORE_ID.store(0, Ordering::SeqCst);
325				0
326			} else {
327				id
328			}
329		};
330
331		// Get the current task.
332		let current_task_borrowed = self.current_task.borrow();
333
334		// Clone the current task.
335		let tid = get_tid();
336		let clone_task = NewTask {
337			tid,
338			func,
339			arg,
340			prio: current_task_borrowed.prio,
341			core_id,
342			stacks: TaskStacks::new(current_task_borrowed.stacks.get_user_stack_size()),
343			object_map: current_task_borrowed.object_map.clone(),
344		};
345
346		// Add it to the task lists.
347		let wakeup = {
348			#[cfg(feature = "smp")]
349			let mut input_locked = get_scheduler_input(core_id).lock();
350			WAITING_TASKS.lock().insert(tid, VecDeque::with_capacity(1));
351			TASKS.lock().insert(
352				tid,
353				TaskHandle::new(
354					tid,
355					current_task_borrowed.prio,
356					#[cfg(feature = "smp")]
357					core_id,
358				),
359			);
360			NO_TASKS.fetch_add(1, Ordering::SeqCst);
361			#[cfg(feature = "smp")]
362			if core_id == core_scheduler().core_id {
363				let clone_task = Rc::new(RefCell::new(Task::from(clone_task)));
364				core_scheduler().ready_queue.push(clone_task);
365				false
366			} else {
367				input_locked.new_tasks.push_back(clone_task);
368				true
369			}
370			#[cfg(not(feature = "smp"))]
371			if core_id == 0 {
372				let clone_task = Rc::new(RefCell::new(Task::from(clone_task)));
373				core_scheduler().ready_queue.push(clone_task);
374				false
375			} else {
376				panic!("Invalid core_id {}!", core_id);
377			}
378		};
379
380		// Wake up the CPU
381		if wakeup {
382			arch::wakeup_core(core_id);
383		}
384
385		tid
386	}
387
388	#[cfg(feature = "newlib")]
389	pub fn clone(&self, func: extern "C" fn(usize), arg: usize) -> TaskId {
390		without_interrupts(|| self.clone_impl(func, arg))
391	}
392
393	/// Returns `true` if a reschedule is required
394	#[inline]
395	#[cfg(all(any(target_arch = "x86_64", target_arch = "riscv64"), feature = "smp"))]
396	pub fn is_scheduling(&self) -> bool {
397		self.current_task.borrow().prio < self.ready_queue.get_highest_priority()
398	}
399
400	#[inline]
401	pub fn handle_waiting_tasks(&mut self) {
402		without_interrupts(|| {
403			crate::executor::run();
404			self.blocked_tasks
405				.handle_waiting_tasks(&mut self.ready_queue);
406		});
407	}
408
409	#[cfg(not(feature = "smp"))]
410	pub fn custom_wakeup(&mut self, task: TaskHandle) {
411		without_interrupts(|| {
412			let task = self.blocked_tasks.custom_wakeup(task);
413			self.ready_queue.push(task);
414		});
415	}
416
417	#[cfg(feature = "smp")]
418	pub fn custom_wakeup(&mut self, task: TaskHandle) {
419		if task.get_core_id() == self.core_id {
420			without_interrupts(|| {
421				let task = self.blocked_tasks.custom_wakeup(task);
422				self.ready_queue.push(task);
423			});
424		} else {
425			get_scheduler_input(task.get_core_id())
426				.lock()
427				.wakeup_tasks
428				.push_back(task);
429			// Wake up the CPU
430			arch::wakeup_core(task.get_core_id());
431		}
432	}
433
434	#[inline]
435	pub fn block_current_task(&mut self, wakeup_time: Option<u64>) {
436		without_interrupts(|| {
437			self.blocked_tasks
438				.add(self.current_task.clone(), wakeup_time);
439		});
440	}
441
442	#[inline]
443	pub fn get_current_task_handle(&self) -> TaskHandle {
444		without_interrupts(|| {
445			let current_task_borrowed = self.current_task.borrow();
446
447			TaskHandle::new(
448				current_task_borrowed.id,
449				current_task_borrowed.prio,
450				#[cfg(feature = "smp")]
451				current_task_borrowed.core_id,
452			)
453		})
454	}
455
456	#[inline]
457	pub fn get_current_task_id(&self) -> TaskId {
458		without_interrupts(|| self.current_task.borrow().id)
459	}
460
461	#[inline]
462	pub fn get_current_task_object_map(
463		&self,
464	) -> Arc<async_lock::RwLock<HashMap<FileDescriptor, Arc<dyn ObjectInterface>, RandomState>>> {
465		without_interrupts(|| self.current_task.borrow().object_map.clone())
466	}
467
468	/// Map a file descriptor to their IO interface and returns
469	/// the shared reference
470	#[inline]
471	pub async fn get_object(&self, fd: FileDescriptor) -> io::Result<Arc<dyn ObjectInterface>> {
472		future::poll_fn(|cx| {
473			without_interrupts(|| {
474				let borrowed = self.current_task.borrow();
475				let mut pinned_obj = core::pin::pin!(borrowed.object_map.read());
476
477				let guard = ready!(pinned_obj.as_mut().poll(cx));
478				Ready(guard.get(&fd).cloned().ok_or(Errno::Badf))
479			})
480		})
481		.await
482	}
483
484	/// Creates a new map between file descriptor and their IO interface and
485	/// clone the standard descriptors.
486	#[allow(dead_code)]
487	pub async fn recreate_objmap(&self) -> io::Result<()> {
488		let mut map = HashMap::<FileDescriptor, Arc<dyn ObjectInterface>, RandomState>::with_hasher(
489			RandomState::with_seeds(0, 0, 0, 0),
490		);
491
492		future::poll_fn(|cx| {
493			without_interrupts(|| {
494				let borrowed = self.current_task.borrow();
495				let mut pinned_obj = core::pin::pin!(borrowed.object_map.read());
496
497				let guard = ready!(pinned_obj.as_mut().poll(cx));
498				// clone standard file descriptors
499				for i in 0..3 {
500					if let Some(obj) = guard.get(&i) {
501						map.insert(i, obj.clone());
502					}
503				}
504
505				Ready(io::Result::Ok(()))
506			})
507		})
508		.await?;
509
510		without_interrupts(|| {
511			self.current_task.borrow_mut().object_map = Arc::new(async_lock::RwLock::new(map));
512		});
513
514		Ok(())
515	}
516
517	/// Insert a new IO interface and returns a file descriptor as
518	/// identifier to this object
519	pub async fn insert_object(&self, obj: Arc<dyn ObjectInterface>) -> io::Result<FileDescriptor> {
520		future::poll_fn(|cx| {
521			without_interrupts(|| {
522				let borrowed = self.current_task.borrow();
523				let mut pinned_obj = core::pin::pin!(borrowed.object_map.write());
524
525				let mut guard = ready!(pinned_obj.as_mut().poll(cx));
526				let new_fd = || -> io::Result<FileDescriptor> {
527					let mut fd: FileDescriptor = 0;
528					loop {
529						if !guard.contains_key(&fd) {
530							break Ok(fd);
531						} else if fd == FileDescriptor::MAX {
532							break Err(Errno::Overflow);
533						}
534
535						fd = fd.saturating_add(1);
536					}
537				};
538
539				let fd = new_fd()?;
540				let _ = guard.insert(fd, obj.clone());
541				Ready(Ok(fd))
542			})
543		})
544		.await
545	}
546
547	/// Duplicate a IO interface and returns a new file descriptor as
548	/// identifier to the new copy
549	pub async fn dup_object(&self, fd: FileDescriptor) -> io::Result<FileDescriptor> {
550		future::poll_fn(|cx| {
551			without_interrupts(|| {
552				let borrowed = self.current_task.borrow();
553				let mut pinned_obj = core::pin::pin!(borrowed.object_map.write());
554
555				let mut guard = ready!(pinned_obj.as_mut().poll(cx));
556				let obj = (*(guard.get(&fd).ok_or(Errno::Inval)?)).clone();
557
558				let new_fd = || -> io::Result<FileDescriptor> {
559					let mut fd: FileDescriptor = 0;
560					loop {
561						if !guard.contains_key(&fd) {
562							break Ok(fd);
563						} else if fd == FileDescriptor::MAX {
564							break Err(Errno::Overflow);
565						}
566
567						fd = fd.saturating_add(1);
568					}
569				};
570
571				let fd = new_fd()?;
572				if guard.try_insert(fd, obj).is_err() {
573					Ready(Err(Errno::Mfile))
574				} else {
575					Ready(Ok(fd))
576				}
577			})
578		})
579		.await
580	}
581
582	pub async fn dup_object2(
583		&self,
584		fd1: FileDescriptor,
585		fd2: FileDescriptor,
586	) -> io::Result<FileDescriptor> {
587		future::poll_fn(|cx| {
588			without_interrupts(|| {
589				let borrowed = self.current_task.borrow();
590				let mut pinned_obj = core::pin::pin!(borrowed.object_map.write());
591				let mut guard = ready!(pinned_obj.as_mut().poll(cx));
592				let obj = guard.get(&fd1).cloned().ok_or(Errno::Badf)?;
593
594				if guard.try_insert(fd2, obj).is_err() {
595					Ready(Err(Errno::Mfile))
596				} else {
597					Ready(Ok(fd2))
598				}
599			})
600		})
601		.await
602	}
603
604	/// Remove a IO interface, which is named by the file descriptor
605	pub async fn remove_object(&self, fd: FileDescriptor) -> io::Result<Arc<dyn ObjectInterface>> {
606		future::poll_fn(|cx| {
607			without_interrupts(|| {
608				let borrowed = self.current_task.borrow();
609				let mut pinned_obj = core::pin::pin!(borrowed.object_map.write());
610				let mut guard = ready!(pinned_obj.as_mut().poll(cx));
611				Ready(guard.remove(&fd).ok_or(Errno::Badf))
612			})
613		})
614		.await
615	}
616
617	#[inline]
618	pub fn get_current_task_prio(&self) -> Priority {
619		without_interrupts(|| self.current_task.borrow().prio)
620	}
621
622	/// Returns reference to prio_bitmap
623	#[allow(dead_code)]
624	#[inline]
625	pub fn get_priority_bitmap(&self) -> &u64 {
626		self.ready_queue.get_priority_bitmap()
627	}
628
629	#[cfg(target_arch = "x86_64")]
630	pub fn set_current_kernel_stack(&self) {
631		let current_task_borrowed = self.current_task.borrow();
632		let tss = unsafe { &mut *CoreLocal::get().tss.get() };
633
634		let rsp = current_task_borrowed.stacks.get_kernel_stack()
635			+ current_task_borrowed.stacks.get_kernel_stack_size() as u64
636			- TaskStacks::MARKER_SIZE as u64;
637		tss.privilege_stack_table[0] = rsp.into();
638		CoreLocal::get().kernel_stack.set(rsp.as_mut_ptr());
639		let ist_start = current_task_borrowed.stacks.get_interrupt_stack()
640			+ current_task_borrowed.stacks.get_interrupt_stack_size() as u64
641			- TaskStacks::MARKER_SIZE as u64;
642		tss.interrupt_stack_table[0] = ist_start.into();
643	}
644
645	pub fn set_current_task_priority(&mut self, prio: Priority) {
646		without_interrupts(|| {
647			trace!("Change priority of the current task");
648			self.current_task.borrow_mut().prio = prio;
649		});
650	}
651
652	pub fn set_priority(&mut self, id: TaskId, prio: Priority) -> Result<(), ()> {
653		trace!("Change priority of task {id} to priority {prio}");
654
655		without_interrupts(|| {
656			let task = get_task_handle(id).ok_or(())?;
657			#[cfg(feature = "smp")]
658			let other_core = task.get_core_id() != self.core_id;
659			#[cfg(not(feature = "smp"))]
660			let other_core = false;
661
662			if other_core {
663				warn!("Have to change the priority on another core");
664			} else if self.current_task.borrow().id == task.get_id() {
665				self.current_task.borrow_mut().prio = prio;
666			} else {
667				self.ready_queue
668					.set_priority(task, prio)
669					.expect("Do not find valid task in ready queue");
670			}
671
672			Ok(())
673		})
674	}
675
676	#[cfg(target_arch = "riscv64")]
677	pub fn set_current_kernel_stack(&self) {
678		let current_task_borrowed = self.current_task.borrow();
679
680		let stack = (current_task_borrowed.stacks.get_kernel_stack()
681			+ current_task_borrowed.stacks.get_kernel_stack_size() as u64
682			- TaskStacks::MARKER_SIZE as u64)
683			.as_u64();
684		CoreLocal::get().kernel_stack.set(stack);
685	}
686
687	/// Save the FPU context for the current FPU owner and restore it for the current task,
688	/// which wants to use the FPU now.
689	#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
690	pub fn fpu_switch(&mut self) {
691		if !Rc::ptr_eq(&self.current_task, &self.fpu_owner) {
692			debug!(
693				"Switching FPU owner from task {} to {}",
694				self.fpu_owner.borrow().id,
695				self.current_task.borrow().id
696			);
697
698			self.fpu_owner.borrow_mut().last_fpu_state.save();
699			self.current_task.borrow().last_fpu_state.restore();
700			self.fpu_owner = self.current_task.clone();
701		}
702	}
703
704	/// Check if a finished task could be deleted.
705	fn cleanup_tasks(&mut self) {
706		// Pop the first finished task and remove it from the TASKS list, which implicitly deallocates all associated memory.
707		while let Some(finished_task) = self.finished_tasks.pop_front() {
708			debug!("Cleaning up task {}", finished_task.borrow().id);
709		}
710	}
711
712	#[cfg(feature = "smp")]
713	pub fn check_input(&mut self) {
714		let mut input_locked = CoreLocal::get().scheduler_input.lock();
715
716		while let Some(task) = input_locked.wakeup_tasks.pop_front() {
717			let task = self.blocked_tasks.custom_wakeup(task);
718			self.ready_queue.push(task);
719		}
720
721		while let Some(new_task) = input_locked.new_tasks.pop_front() {
722			let task = Rc::new(RefCell::new(Task::from(new_task)));
723			self.ready_queue.push(task.clone());
724		}
725	}
726
727	/// Only the idle task should call this function.
728	/// Set the idle task to halt state if not another
729	/// available.
730	pub fn run() -> ! {
731		let backoff = Backoff::new();
732
733		loop {
734			let core_scheduler = core_scheduler();
735			interrupts::disable();
736
737			// run async tasks
738			crate::executor::run();
739
740			// do housekeeping
741			#[cfg(feature = "smp")]
742			core_scheduler.check_input();
743			core_scheduler.cleanup_tasks();
744
745			if core_scheduler.ready_queue.is_empty() {
746				if backoff.is_completed() {
747					interrupts::enable_and_wait();
748					backoff.reset();
749				} else {
750					interrupts::enable();
751					backoff.snooze();
752				}
753			} else {
754				interrupts::enable();
755				core_scheduler.reschedule();
756				backoff.reset();
757			}
758		}
759	}
760
761	#[inline]
762	#[cfg(target_arch = "aarch64")]
763	pub fn get_last_stack_pointer(&self) -> memory_addresses::VirtAddr {
764		self.current_task.borrow().last_stack_pointer
765	}
766
767	/// Triggers the scheduler to reschedule the tasks.
768	/// Interrupt flag must be cleared before calling this function.
769	pub fn scheduler(&mut self) -> Option<*mut usize> {
770		// run background tasks
771		crate::executor::run();
772
773		// Someone wants to give up the CPU
774		// => we have time to cleanup the system
775		self.cleanup_tasks();
776
777		// Get information about the current task.
778		let (id, last_stack_pointer, prio, status) = {
779			let mut borrowed = self.current_task.borrow_mut();
780			(
781				borrowed.id,
782				ptr::from_mut(&mut borrowed.last_stack_pointer).cast::<usize>(),
783				borrowed.prio,
784				borrowed.status,
785			)
786		};
787
788		let mut new_task = None;
789
790		if status == TaskStatus::Running {
791			// A task is currently running.
792			// Check if a task with a equal or higher priority is available.
793			if let Some(task) = self.ready_queue.pop_with_prio(prio) {
794				new_task = Some(task);
795			}
796		} else {
797			if status == TaskStatus::Finished {
798				// Mark the finished task as invalid and add it to the finished tasks for a later cleanup.
799				self.current_task.borrow_mut().status = TaskStatus::Invalid;
800				self.finished_tasks.push_back(self.current_task.clone());
801			}
802
803			// No task is currently running.
804			// Check if there is any available task and get the one with the highest priority.
805			if let Some(task) = self.ready_queue.pop() {
806				// This available task becomes the new task.
807				debug!("Task is available.");
808				new_task = Some(task);
809			} else if status != TaskStatus::Idle {
810				// The Idle task becomes the new task.
811				debug!("Only Idle Task is available.");
812				new_task = Some(self.idle_task.clone());
813			}
814		}
815
816		if let Some(task) = new_task {
817			// There is a new task we want to switch to.
818
819			// Handle the current task.
820			if status == TaskStatus::Running {
821				// Mark the running task as ready again and add it back to the queue.
822				self.current_task.borrow_mut().status = TaskStatus::Ready;
823				self.ready_queue.push(self.current_task.clone());
824			}
825
826			// Handle the new task and get information about it.
827			let (new_id, new_stack_pointer) = {
828				let mut borrowed = task.borrow_mut();
829				if borrowed.status != TaskStatus::Idle {
830					// Mark the new task as running.
831					borrowed.status = TaskStatus::Running;
832				}
833
834				(borrowed.id, borrowed.last_stack_pointer)
835			};
836
837			if id != new_id {
838				// Tell the scheduler about the new task.
839				debug!(
840					"Switching task from {} to {} (stack {:#X} => {:p})",
841					id,
842					new_id,
843					unsafe { *last_stack_pointer },
844					new_stack_pointer
845				);
846				#[cfg(not(target_arch = "riscv64"))]
847				{
848					self.current_task = task;
849				}
850
851				// Finally return the context of the new task.
852				#[cfg(not(target_arch = "riscv64"))]
853				return Some(last_stack_pointer);
854
855				#[cfg(target_arch = "riscv64")]
856				{
857					if sstatus::read().fs() == sstatus::FS::Dirty {
858						self.current_task.borrow_mut().last_fpu_state.save();
859					}
860					task.borrow().last_fpu_state.restore();
861					self.current_task = task;
862					unsafe {
863						switch_to_task(last_stack_pointer, new_stack_pointer.as_usize());
864					}
865				}
866			}
867		}
868
869		None
870	}
871}
872
873fn get_tid() -> TaskId {
874	static TID_COUNTER: AtomicI32 = AtomicI32::new(0);
875	let guard = TASKS.lock();
876
877	loop {
878		let id = TaskId::from(TID_COUNTER.fetch_add(1, Ordering::SeqCst));
879		if !guard.contains_key(&id) {
880			return id;
881		}
882	}
883}
884
885#[inline]
886pub(crate) fn abort() -> ! {
887	core_scheduler().exit(-1)
888}
889
890/// Add a per-core scheduler for the current core.
891pub(crate) fn add_current_core() {
892	// Create an idle task for this core.
893	let core_id = core_id();
894	let tid = get_tid();
895	let idle_task = Rc::new(RefCell::new(Task::new_idle(tid, core_id)));
896
897	// Add the ID -> Task mapping.
898	WAITING_TASKS.lock().insert(tid, VecDeque::with_capacity(1));
899	TASKS.lock().insert(
900		tid,
901		TaskHandle::new(
902			tid,
903			IDLE_PRIO,
904			#[cfg(feature = "smp")]
905			core_id,
906		),
907	);
908	// Initialize a scheduler for this core.
909	debug!("Initializing scheduler for core {core_id} with idle task {tid}");
910	let boxed_scheduler = Box::new(PerCoreScheduler {
911		#[cfg(feature = "smp")]
912		core_id,
913		current_task: idle_task.clone(),
914		#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
915		fpu_owner: idle_task.clone(),
916		idle_task,
917		ready_queue: PriorityTaskQueue::new(),
918		finished_tasks: VecDeque::new(),
919		blocked_tasks: BlockedTaskQueue::new(),
920	});
921
922	let scheduler = Box::into_raw(boxed_scheduler);
923	set_core_scheduler(scheduler);
924	#[cfg(feature = "smp")]
925	{
926		SCHEDULER_INPUTS.lock().insert(
927			core_id.try_into().unwrap(),
928			&CoreLocal::get().scheduler_input,
929		);
930		#[cfg(target_arch = "x86_64")]
931		CORE_HLT_STATE
932			.lock()
933			.insert(core_id.try_into().unwrap(), &CoreLocal::get().hlt);
934	}
935}
936
937#[inline]
938#[cfg(all(target_arch = "x86_64", feature = "smp", not(feature = "idle-poll")))]
939pub(crate) fn take_core_hlt_state(core_id: CoreId) -> bool {
940	CORE_HLT_STATE.lock()[usize::try_from(core_id).unwrap()].swap(false, Ordering::Acquire)
941}
942
943#[inline]
944#[cfg(feature = "smp")]
945fn get_scheduler_input(core_id: CoreId) -> &'static InterruptTicketMutex<SchedulerInput> {
946	SCHEDULER_INPUTS.lock()[usize::try_from(core_id).unwrap()]
947}
948
949pub unsafe fn spawn(
950	func: unsafe extern "C" fn(usize),
951	arg: usize,
952	prio: Priority,
953	stack_size: usize,
954	selector: isize,
955) -> TaskId {
956	static CORE_COUNTER: AtomicU32 = AtomicU32::new(1);
957
958	let core_id = if selector < 0 {
959		// use Round Robin to schedule the cores
960		CORE_COUNTER.fetch_add(1, Ordering::SeqCst) % get_processor_count()
961	} else {
962		selector as u32
963	};
964
965	unsafe { PerCoreScheduler::spawn(func, arg, prio, core_id, stack_size) }
966}
967
968#[allow(clippy::result_unit_err)]
969pub fn join(id: TaskId) -> Result<(), ()> {
970	let core_scheduler = core_scheduler();
971
972	debug!(
973		"Task {} is waiting for task {}",
974		core_scheduler.get_current_task_id(),
975		id
976	);
977
978	loop {
979		let mut waiting_tasks_guard = WAITING_TASKS.lock();
980
981		if let Some(queue) = waiting_tasks_guard.get_mut(&id) {
982			queue.push_back(core_scheduler.get_current_task_handle());
983			core_scheduler.block_current_task(None);
984
985			// Switch to the next task.
986			drop(waiting_tasks_guard);
987			core_scheduler.reschedule();
988		} else {
989			return Ok(());
990		}
991	}
992}
993
994pub fn shutdown(arg: i32) -> ! {
995	crate::syscalls::shutdown(arg)
996}
997
998fn get_task_handle(id: TaskId) -> Option<TaskHandle> {
999	TASKS.lock().get(&id).copied()
1000}
1001
1002#[cfg(all(target_arch = "x86_64", feature = "common-os"))]
1003pub(crate) static BOOT_ROOT_PAGE_TABLE: OnceCell<usize> = OnceCell::new();
1004
1005#[cfg(all(target_arch = "x86_64", feature = "common-os"))]
1006pub(crate) fn get_root_page_table() -> usize {
1007	let current_task_borrowed = core_scheduler().current_task.borrow_mut();
1008	current_task_borrowed.root_page_table
1009}