patch-2.0.19 linux/kernel/sched.c

Next file: linux/mm/filemap.c
Previous file: linux/kernel/ksyms.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.0.18/linux/kernel/sched.c linux/kernel/sched.c
@@ -482,17 +482,88 @@
 	printk("       *q = %p\n",*q);
 }
 
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to sleep, while the "waiting" variable is
+ * incremented _while_ the process is sleeping on that
+ * semaphore. 
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ */
+static inline void normalize_semaphore(struct semaphore *sem)
+{
+	atomic_add(xchg(&sem->waiting,0), &sem->count);
+}
+
+/*
+ * When __up() is called, the count was negative before
+ * incrementing it, and we need to wake up somebody. In
+ * most cases "waiting" will be positive, and the normalization
+ * will allow things to continue. However, if somebody has
+ * /just/ done a down(), it may be that count was negative
+ * without waiting being positive (or in the generic case
+ * "count is more negative than waiting is positive"), and
+ * the waiter needs to check this itself (see __down).
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in <asm/semaphore.h>
+ * where we want to avoid any extra jumps and calls.
+ */
+inline void __up(struct semaphore *sem)
+{
+	normalize_semaphore(sem);
+	wake_up(&sem->wait);
+}
+
 void __down(struct semaphore * sem)
 {
-	struct wait_queue wait = { current, NULL };
+	struct task_struct *tsk = current;
+	struct wait_queue wait = { tsk, NULL };
+
+	/*
+	 * The order here is important. We add ourselves to the
+	 * wait queues and mark ourselves sleeping _first_. That
+	 * way, if a "up()" comes in here, we'll either get
+	 * woken up (up happens after the wait queues are set up)
+	 * OR we'll have "waiting > 0".
+	 */
+	tsk->state = TASK_UNINTERRUPTIBLE;
 	add_wait_queue(&sem->wait, &wait);
-	current->state = TASK_UNINTERRUPTIBLE;
-	while (sem->count <= 0) {
-		schedule();
-		current->state = TASK_UNINTERRUPTIBLE;
+	atomic_inc(&sem->waiting);
+
+	/*
+	 * Ok, we're set up. The only race here is really that
+	 * an "up()" might have incremented count before we got
+	 * here, so we check "count+waiting". If that is larger
+	 * than zero, we shouldn't sleep, but re-try the lock.
+	 */
+	if (sem->count+sem->waiting <= 0) {
+		/*
+		 * If "count+waiting" <= 0, we have to wait
+		 * for a up(), which will normalize the count.
+		 * Remember, at this point we have decremented
+		 * count, and incremented up, so if count is
+		 * zero or positive we need to return to re-try
+		 * the lock.  It _may_ be that both count and
+		 * waiting is zero and that it is still locked,
+		 * but we still want to re-try the lock in that
+		 * case to make count go negative again so that
+		 * the optimized "up()" wake_up sequence works.
+		 */
+		do {
+			schedule();
+			tsk->state = TASK_UNINTERRUPTIBLE;
+		} while (sem->count < 0);
 	}
-	current->state = TASK_RUNNING;
+	tsk->state = TASK_RUNNING;
 	remove_wait_queue(&sem->wait, &wait);
+	normalize_semaphore(sem);
 }
 
 static inline void __sleep_on(struct wait_queue **p, int state)

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov