From 80441925e92d4d97af661418fd652e5ebdf4cb43 Mon Sep 17 00:00:00 2001 From: Brian Hie Date: Sun, 18 Feb 2024 16:03:00 +0000 Subject: [PATCH 1/2] add option to have gelu activations --- src/layers.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/layers.py b/src/layers.py index 436b5f2..518fe70 100644 --- a/src/layers.py +++ b/src/layers.py @@ -41,7 +41,14 @@ def __init__( super().__init__() multiple_of = config.get("inner_size_multiple_of", 64) - self.act = F.silu + self.act_type = config.get("mlp_activation", "gelu") + if self.act_type == "gelu": + self.act = F.gelu + elif self.act_type == "silu": + self.act = F.silu + else: + raise NotImplementedError + self.multiple_of = multiple_of * config.model_parallel_size inner_size = int(2 * config.hidden_size * 4 / 3) From ca6401c94169a7997e21a1e16b74c3f4301ada7d Mon Sep 17 00:00:00 2001 From: Brian Hie Date: Sun, 18 Feb 2024 23:36:04 +0000 Subject: [PATCH 2/2] default to silu --- src/layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers.py b/src/layers.py index 518fe70..b9d518b 100644 --- a/src/layers.py +++ b/src/layers.py @@ -41,7 +41,7 @@ def __init__( super().__init__() multiple_of = config.get("inner_size_multiple_of", 64) - self.act_type = config.get("mlp_activation", "gelu") + self.act_type = config.get("mlp_activation", "silu") if self.act_type == "gelu": self.act = F.gelu elif self.act_type == "silu":