softwaremill · adamw · Dec 17, 2024 · Dec 9, 2024 · Dec 10, 2024 · Dec 10, 2024
diff --git a/core/src/main/scala/ox/resilience/RateLimiter.scala b/core/src/main/scala/ox/resilience/RateLimiter.scala
@@ -3,28 +3,52 @@ package ox.resilience
 import scala.concurrent.duration.FiniteDuration
 import ox.*
 
+import java.util.concurrent.Semaphore
 import scala.annotation.tailrec
 
-/** Rate limiter with a customizable algorithm. Operations can be blocked or dropped, when the rate limit is reached. */
-class RateLimiter private (algorithm: RateLimiterAlgorithm):
+/** Rate limiter with a customizable algorithm. Operations can be blocked or dropped, when the rate limit is reached. operationMode decides
+  * if whole time of execution should be considered or just the start.
+  */
+class RateLimiter private (algorithm: RateLimiterAlgorithm, operationMode: RateLimiterMode):
+  private val semaphore = new Semaphore(algorithm.rate)
+
   /** Runs the operation, blocking if the rate limit is reached, until the rate limiter is replenished. */
   def runBlocking[T](operation: => T): T =
-    algorithm.acquire()
-    operation
+    operationMode match
+      case RateLimiterMode.OperationStart =>
+        algorithm.acquire()
+        operation
+      case RateLimiterMode.OperationDuration =>
+        semaphore.acquire()
+        algorithm.acquire()
+        val result = operation
+        semaphore.release()
+        result
 
   /** Runs or drops the operation, if the rate limit is reached.
     *
     * @return
     *   `Some` if the operation has been allowed to run, `None` if the operation has been dropped.
     */
   def runOrDrop[T](operation: => T): Option[T] =
-    if algorithm.tryAcquire() then Some(operation)
-    else None
+    operationMode match
+      case RateLimiterMode.OperationStart =>
+        if algorithm.tryAcquire() then Some(operation)
+        else None
+      case RateLimiterMode.OperationDuration =>
+        val acquired = semaphore.tryAcquire()
+        if acquired && algorithm.tryAcquire() then
+          val result = operation
+          semaphore.release()
+          Some(result)
+        else
+          if acquired then semaphore.release()
+          None
 
 end RateLimiter
 
 object RateLimiter:
-  def apply(algorithm: RateLimiterAlgorithm)(using Ox): RateLimiter =
+  def apply(algorithm: RateLimiterAlgorithm, operationMode: RateLimiterMode = RateLimiterMode.OperationStart)(using Ox): RateLimiter =
     @tailrec
     def update(): Unit =
       val waitTime = algorithm.getNextUpdate
@@ -36,7 +60,7 @@ object RateLimiter:
     end update
 
     forkDiscard(update())
-    new RateLimiter(algorithm)
+    new RateLimiter(algorithm, operationMode)
   end apply
 
   /** Creates a rate limiter using a fixed window algorithm.
@@ -46,11 +70,15 @@ object RateLimiter:
     * @param maxOperations
     *   Maximum number of operations that are allowed to **start** within a time [[window]].
     * @param window
-    *   Interval of time between replenishing the rate limiter. THe rate limiter is replenished to allow up to [[maxOperations]] in the next
+    *   Interval of time between replenishing the rate limiter. The rate limiter is replenished to allow up to [[maxOperations]] in the next
     *   time window.
+    * @param operationMode
+    *   Whether to consider whole execution time of operation or just the start.
     */
-  def fixedWindow(maxOperations: Int, window: FiniteDuration)(using Ox): RateLimiter =
-    apply(RateLimiterAlgorithm.FixedWindow(maxOperations, window))
+  def fixedWindow(maxOperations: Int, window: FiniteDuration, operationMode: RateLimiterMode = RateLimiterMode.OperationStart)(using
+      Ox
+  ): RateLimiter =
+    apply(RateLimiterAlgorithm.FixedWindow(maxOperations, window), operationMode)
 
   /** Creates a rate limiter using a sliding window algorithm.
     *
@@ -60,19 +88,33 @@ object RateLimiter:
     *   Maximum number of operations that are allowed to **start** within any [[window]] of time.
     * @param window
     *   Length of the window.
+    * @param operationMode
+    *   Whether to consider whole execution time of operation or just the start.
     */
-  def slidingWindow(maxOperations: Int, window: FiniteDuration)(using Ox): RateLimiter =
-    apply(RateLimiterAlgorithm.SlidingWindow(maxOperations, window))
+  def slidingWindow(maxOperations: Int, window: FiniteDuration, operationMode: RateLimiterMode = RateLimiterMode.OperationStart)(using
+      Ox
+  ): RateLimiter =
+    apply(RateLimiterAlgorithm.SlidingWindow(maxOperations, window), operationMode)
 
-  /** Rate limiter with token/leaky bucket algorithm.
+  /** Creates a rate limiter with token/leaky bucket algorithm.
     *
     * Must be run within an [[Ox]] concurrency scope, as a background fork is created, to replenish the rate limiter.
     *
     * @param maxTokens
     *   Max capacity of tokens in the algorithm, limiting the operations that are allowed to **start** concurrently.
     * @param refillInterval
     *   Interval of time between adding a single token to the bucket.
+    * @param operationMode
+    *   Whether to consider whole execution time of operation or just the start.
     */
-  def leakyBucket(maxTokens: Int, refillInterval: FiniteDuration)(using Ox): RateLimiter =
-    apply(RateLimiterAlgorithm.LeakyBucket(maxTokens, refillInterval))
+  def leakyBucket(maxTokens: Int, refillInterval: FiniteDuration, operationMode: RateLimiterMode = RateLimiterMode.OperationStart)(using
+      Ox
+  ): RateLimiter =
+    apply(RateLimiterAlgorithm.LeakyBucket(maxTokens, refillInterval), operationMode)
 end RateLimiter
+
+/** Decides if RateLimiter should consider only start of an operation or whole time of execution.
+  */
+enum RateLimiterMode:
+  case OperationStart
+  case OperationDuration
diff --git a/core/src/main/scala/ox/resilience/RateLimiterAlgorithm.scala b/core/src/main/scala/ox/resilience/RateLimiterAlgorithm.scala
@@ -30,6 +30,8 @@ trait RateLimiterAlgorithm:
   /** Returns the time in nanoseconds that needs to elapse until the next update. It should not modify internal state. */
   def getNextUpdate: Long
 
+  def rate: Int
+
 end RateLimiterAlgorithm
 
 object RateLimiterAlgorithm:
@@ -117,7 +119,7 @@ object RateLimiterAlgorithm:
 
   end SlidingWindow
 
-  /** Token/leaky bucket algorithm It adds a token to start an new operation each `per` with a maximum number of tokens of `rate`. */
+  /** Token/leaky bucket algorithm It adds a token to start a new operation each `per` with a maximum number of tokens of `rate`. */
   case class LeakyBucket(rate: Int, per: FiniteDuration) extends RateLimiterAlgorithm:
     private val refillInterval = per.toNanos
     private val lastRefillTime = new AtomicLong(System.nanoTime())

diff --git a/core/src/test/scala/ox/resilience/RateLimiterTest.scala b/core/src/test/scala/ox/resilience/RateLimiterTest.scala
@@ -4,9 +4,11 @@ import ox.*
 import ox.util.ElapsedTime
 import org.scalatest.flatspec.AnyFlatSpec
 import org.scalatest.matchers.should.Matchers
+import java.util.concurrent.atomic.AtomicLong
 import org.scalatest.{EitherValues, TryValues}
 import scala.concurrent.duration.*
 import java.util.concurrent.atomic.AtomicReference
+import ox.resilience.RateLimiterMode
 
 class RateLimiterTest extends AnyFlatSpec with Matchers with EitherValues with TryValues with ElapsedTime:
   behavior of "fixed rate RateLimiter"
@@ -180,6 +182,77 @@ class RateLimiterTest extends AnyFlatSpec with Matchers with EitherValues with T
     }
   }
 
+  it should "allow to run more long running operations concurrently than max rate when not considering operation's time" in {
+    supervised:
+      val rateLimiter = RateLimiter.fixedWindow(2, FiniteDuration(1, "second"))
+
+      val operationsRunning = AtomicLong(0L)
+
+      def operation =
+        operationsRunning.updateAndGet(_ + 1)
+        Thread.sleep(3000L)
+        operationsRunning.updateAndGet(_ - 1)
+        0
+      end operation
+
+      var result1: Option[Int] = Some(-1)
+      var result2: Option[Int] = Some(-1)
+      var result3: Int = -1
+      var resultOperations: Long = 0L
+
+      supervised:
+        forkUserDiscard:
+          result1 = rateLimiter.runOrDrop(operation)
+        forkUserDiscard:
+          result2 = rateLimiter.runOrDrop(operation)
+        forkUserDiscard:
+          result3 = rateLimiter.runBlocking(operation)
+        forkUserDiscard:
+          // Wait for next window for 3rd operation to start, take number of operations running
+          Thread.sleep(1500L)
+          resultOperations = operationsRunning.get()
+
+      result1 shouldBe Some(0)
+      result2 shouldBe Some(0)
+      result3 shouldBe 0
+      resultOperations shouldBe 3
+  }
+
+  it should "not allow to run more long running operations concurrently than max rate when considering operation time" in {
+    supervised:
+      val rateLimiter = RateLimiter.fixedWindow(2, FiniteDuration(1, "second"), RateLimiterMode.OperationDuration)
+
+      val operationsRunning = AtomicLong(0L)
+
+      def operation =
+        operationsRunning.updateAndGet(_ + 1)
+        Thread.sleep(3000L)
+        operationsRunning.updateAndGet(_ - 1)
+        0
+
+      var result1: Option[Int] = Some(-1)
+      var result2: Option[Int] = Some(-1)
+      var result3: Int = -1
+      var resultOperations: Long = 0L
+
+      supervised:
+        forkUserDiscard:
+          result1 = rateLimiter.runOrDrop(operation)
+        forkUserDiscard:
+          result2 = rateLimiter.runOrDrop(operation)
+        forkUserDiscard:
+          result3 = rateLimiter.runBlocking(operation)
+        forkUserDiscard:
+          // Wait for next window for 3rd operation to start, take number of operations running
+          Thread.sleep(1500L)
+          resultOperations = operationsRunning.get()
+
+      result1 shouldBe Some(0)
+      result2 shouldBe Some(0)
+      result3 shouldBe 0
+      resultOperations shouldBe 2
+  }
+
   behavior of "sliding window RateLimiter"
   it should "drop operation when rate limit is exceeded" in {
     supervised {

diff --git a/doc/utils/rate-limiter.md b/doc/utils/rate-limiter.md
@@ -1,6 +1,6 @@
 # Rate limiter
 
-The rate limiter mechanism allows controlling the rate at which operations are executed. It ensures that at most a certain number of operations are run concurrently within a specified time frame, preventing system overload and ensuring fair resource usage. Note that the implemented limiting mechanism only takes into account the start of execution and not the whole execution of an operation. 
+The rate limiter mechanism allows controlling the rate at which operations are executed. It ensures that at most a certain number of operations are run concurrently within a specified time frame, preventing system overload and ensuring fair resource usage. Note that you can choose if mechanism takes into account only the start of execution or the whole execution of an operation. 
 
 ## API
 
@@ -33,7 +33,7 @@ The `operation` can be provided directly using a by-name parameter, i.e. `f: =>
 
 ## Configuration
 
-The configuration of a `RateLimiter` depends on an underlying algorithm that controls whether an operation can be executed or not. The following algorithms are available:
+The configuration of a `RateLimiter` depends on mode whether to consider execution time and on an underlying algorithm that controls whether an operation can be executed or not. The following algorithms are available:
 - `RateLimiterAlgorithm.FixedWindow(rate: Int, dur: FiniteDuration)` - where `rate` is the maximum number of operations to be executed in fixed windows of `dur` duration.
 - `RateLimiterAlgorithm.SlidingWindow(rate: Int, dur: FiniteDuration)` - where `rate` is the maximum number of operations to be executed in any window of time of duration `dur`.
 - `RateLimiterAlgorithm.Bucket(maximum: Int, dur: FiniteDuration)` - where `maximum` is the maximum capacity of tokens available in the token bucket algorithm and one token is added each `dur`. It can represent both the leaky bucket algorithm or the token bucket algorithm.
@@ -42,10 +42,11 @@ The configuration of a `RateLimiter` depends on an underlying algorithm that con
 
 You can use one of the following shorthands to define a Rate Limiter with the corresponding algorithm:
 
-- `RateLimiter.fixedWindow(rate: Int, dur: FiniteDuration)`,
-- `RateLimiter.slidingWindow(rate: Int, dur: FiniteDuration)`,
-- `RateLimiter.leakyBucket(maximum: Int, dur: FiniteDuration)`,
+- `RateLimiter.fixedWindow(rate: Int, dur: FiniteDuration, operationMode: RateLimiterMode = RateLimiterMode.OperationStart)`,
+- `RateLimiter.slidingWindow(rate: Int, dur: FiniteDuration, operationMode: RateLimiterMode = RateLimiterMode.OperationStart)`,
+- `RateLimiter.leakyBucket(maximum: Int, dur: FiniteDuration, operationMode: RateLimiterMode = RateLimiterMode.OperationStart)`,
 
+These shorthands also allow to define if the whole execution time of an operation should be considered.
 See the tests in `ox.resilience.*` for more.
 
 ## Custom rate limiter algorithms