commit ea41105e137c538f46beddcb5ebbe8df068f69ab
from: mpi <mpi@openbsd.org>
date: Fri May 16 13:40:30 2025 UTC

Use a FIFO, instead of LIFO, queue for passing dead threads to the reaper.

Reduce latency with a huge number of CPU and jobs.

ok claudio@

commit - e2e082d2b5af4e20cd37089510b0c2b47e51eae4
commit + ea41105e137c538f46beddcb5ebbe8df068f69ab
blob - a516b35f20aaf200b172ea375829fb11085f59c8
blob + aebc361419aa310dfcc2d3e6e34a7635e0560d3e
--- sys/kern/kern_exit.c
+++ sys/kern/kern_exit.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: kern_exit.c,v 1.245 2025/05/02 05:04:38 dlg Exp $	*/
+/*	$OpenBSD: kern_exit.c,v 1.246 2025/05/16 13:40:30 mpi Exp $	*/
 /*	$NetBSD: kern_exit.c,v 1.39 1996/04/22 01:38:25 christos Exp $	*/
 
 /*
@@ -257,7 +257,7 @@ exit1(struct proc *p, int xexit, int xsig, int flags)
         /*
 	 * Remove proc from pidhash chain and allproc so looking
 	 * it up won't work.  We will put the proc on the
-	 * deadproc list later (using the p_hash member), and
+	 * deadproc list later (using the p_runq member), and
 	 * wake up the reaper when we do.  If this is the last
 	 * thread of a process that isn't PS_NOZOMBIE, we'll put
 	 * the process on the zombprocess list below.
@@ -396,21 +396,21 @@ exit1(struct proc *p, int xexit, int xsig, int flags)
 }
 
 /*
- * Locking of this proclist is special; it's accessed in a
+ * Locking of this prochead is special; it's accessed in a
  * critical section of process exit, and thus locking it can't
  * modify interrupt state.  We use a simple spin lock for this
- * proclist.  We use the p_hash member to linkup to deadproc.
+ * prochead.  We use the p_runq member to linkup to deadproc.
  */
 struct mutex deadproc_mutex =
     MUTEX_INITIALIZER_FLAGS(IPL_NONE, "deadproc", MTX_NOWITNESS);
-struct proclist deadproc = LIST_HEAD_INITIALIZER(deadproc);
+struct prochead deadproc = TAILQ_HEAD_INITIALIZER(deadproc);
 
 /*
  * We are called from sched_idle() once it is safe to schedule the
  * dead process's resources to be freed. So this is not allowed to sleep.
  *
  * We lock the deadproc list, place the proc on that list (using
- * the p_hash member), and wake up the reaper.
+ * the p_runq member), and wake up the reaper.
  */
 void
 exit2(struct proc *p)
@@ -421,7 +421,7 @@ exit2(struct proc *p)
 	mtx_leave(&p->p_p->ps_mtx);
 
 	mtx_enter(&deadproc_mutex);
-	LIST_INSERT_HEAD(&deadproc, p, p_hash);
+	TAILQ_INSERT_TAIL(&deadproc, p, p_runq);
 	mtx_leave(&deadproc_mutex);
 
 	wakeup(&deadproc);
@@ -451,12 +451,12 @@ reaper(void *arg)
 
 	for (;;) {
 		mtx_enter(&deadproc_mutex);
-		while ((p = LIST_FIRST(&deadproc)) == NULL)
+		while ((p = TAILQ_FIRST(&deadproc)) == NULL)
 			msleep_nsec(&deadproc, &deadproc_mutex, PVM, "reaper",
 			    INFSLP);
 
 		/* Remove us from the deadproc list. */
-		LIST_REMOVE(p, p_hash);
+		TAILQ_REMOVE(&deadproc, p, p_runq);
 		mtx_leave(&deadproc_mutex);
 
 		WITNESS_THREAD_EXIT(p);
blob - 6939c7e6b975eaf79ce4359bea2ed266b1c497e5
blob + 40620163489c1a46a49027c53940729c056b9d15
--- sys/kern/kern_sched.c
+++ sys/kern/kern_sched.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: kern_sched.c,v 1.104 2025/03/10 09:28:56 claudio Exp $	*/
+/*	$OpenBSD: kern_sched.c,v 1.105 2025/05/16 13:40:30 mpi Exp $	*/
 /*
  * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org>
  *
@@ -95,7 +95,7 @@ sched_init_cpu(struct cpu_info *ci)
 
 	kthread_create_deferred(sched_kthreads_create, ci);
 
-	LIST_INIT(&spc->spc_deadproc);
+	TAILQ_INIT(&spc->spc_deadproc);
 	SIMPLEQ_INIT(&spc->spc_deferred);
 
 	/*
@@ -167,8 +167,8 @@ sched_idle(void *v)
 			mi_switch();
 			SCHED_UNLOCK();
 
-			while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
-				LIST_REMOVE(dead, p_hash);
+			while ((dead = TAILQ_FIRST(&spc->spc_deadproc))) {
+				TAILQ_REMOVE(&spc->spc_deadproc, dead, p_runq);
 				exit2(dead);
 			}
 		}
@@ -206,15 +206,13 @@ sched_idle(void *v)
  * stack torn from under us before we manage to switch to another proc.
  * Therefore we have a per-cpu list of dead processes where we put this
  * proc and have idle clean up that list and move it to the reaper list.
- * All this will be unnecessary once we can bind the reaper this cpu
- * and not risk having it switch to another in case it sleeps.
  */
 void
 sched_exit(struct proc *p)
 {
 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
 
-	LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash);
+	TAILQ_INSERT_TAIL(&spc->spc_deadproc, p, p_runq);
 
 	tuagg_add_runtime();
 
blob - 2bd5c4e594b815ae613977346a9bd5e3fe8cee75
blob + 970292b034daf9c916453a279bb4be2d6e02ad99
--- sys/sys/sched.h
+++ sys/sys/sched.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: sched.h,v 1.73 2024/07/08 14:46:47 mpi Exp $	*/
+/*	$OpenBSD: sched.h,v 1.74 2025/05/16 13:40:30 mpi Exp $	*/
 /* $NetBSD: sched.h,v 1.2 1999/02/28 18:14:58 ross Exp $ */
 
 /*-
@@ -108,7 +108,7 @@ struct smr_entry;
 struct schedstate_percpu {
 	struct proc *spc_idleproc;	/* idle proc for this cpu */
 	TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS];
-	LIST_HEAD(,proc) spc_deadproc;
+	TAILQ_HEAD(,proc) spc_deadproc;
 	struct timespec spc_runtime;	/* time curproc started running */
 	volatile int spc_schedflags;	/* flags; see below */
 	u_int spc_schedticks;		/* ticks for schedclock() */