Skip to content

Commit

Permalink
Merge pull request #6062 from oasisprotocol/peternose/bugfix/km-cleanup
Browse files Browse the repository at this point in the history
go/runtime/bundle/manager: Cleanup key manager bundles on startup
  • Loading branch information
peternose authored Feb 12, 2025
2 parents fbdc523 + 0fed5e5 commit b330bac
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 28 deletions.
1 change: 1 addition & 0 deletions .changelog/6062.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
go/runtime/bundle/manager: Cleanup key manager bundles on startup
45 changes: 30 additions & 15 deletions go/runtime/bundle/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,17 @@ func (m *Manager) Add(path string) error {
return err
}

if manifest.ID.IsKeyManager() {
if err = m.removeBundle(manifest.ExplodedDataDir); err != nil {
m.logger.Error("failed to remove bundle",
"err", err,
"path", path,
)
}
m.logger.Warn("not allowed to add key manager bundle")
return fmt.Errorf("not allowed to add key manager bundle")
}

if err := m.registerManifest(manifest); err != nil {
m.logger.Error("failed to register manifest",
"err", err,
Expand All @@ -243,6 +254,12 @@ func (m *Manager) Download(runtimeID common.Namespace, manifestHashes []hash.Has
return
}

// Key managers must configure the bundle statically to prevent
// running different versions of the runtime.
if runtimeID.IsKeyManager() {
return
}

// Download bundles only if at least one endpoint is configured.
if len(m.globalBaseURLs) == 0 && len(m.runtimeBaseURLs[runtimeID]) == 0 {
return
Expand Down Expand Up @@ -281,6 +298,11 @@ func (m *Manager) Download(runtimeID common.Namespace, manifestHashes []hash.Has
//
// Warning: If clean-up fails it's not retried.
func (m *Manager) Cleanup(runtimeID common.Namespace, version version.Version) {
// Key managers are allowed to run obsolete runtime versions.
if runtimeID.IsKeyManager() {
return
}

m.mu.Lock()
defer m.mu.Unlock()

Expand Down Expand Up @@ -538,23 +560,20 @@ func (m *Manager) loadManifests() ([]*ExplodedManifest, error) {
func (m *Manager) cleanOnStartup(manifests, exploded []*ExplodedManifest) ([]*ExplodedManifest, error) {
m.logger.Info("cleaning bundles")

detached := make(map[hash.Hash]struct{})
explodedHashes := make(map[hash.Hash]struct{})
for _, manifest := range exploded {
if manifest.IsDetached() {
detached[manifest.Hash()] = struct{}{}
}
explodedHashes[manifest.Hash()] = struct{}{}
}

shouldKeep := func(manifest *ExplodedManifest) bool {
if _, ok := m.runtimeIDs[manifest.ID]; !ok {
return false
}
if manifest.IsDetached() {
if _, ok := detached[manifest.Hash()]; !ok {
if manifest.IsDetached() || manifest.ID.IsKeyManager() {
if _, ok := explodedHashes[manifest.Hash()]; !ok {
return false
}
}

return true
}

Expand All @@ -566,7 +585,7 @@ func (m *Manager) cleanOnStartup(manifests, exploded []*ExplodedManifest) ([]*Ex
}

if err := m.removeBundle(manifest.ExplodedDataDir); err != nil {
return nil, err
return nil, fmt.Errorf("failed to remove bundle: %w", err)
}
}

Expand Down Expand Up @@ -624,13 +643,13 @@ func (m *Manager) cleanBundle(manifest *ExplodedManifest) {
)

if ok := m.store.RemoveManifest(manifest.Hash()); !ok {
m.logger.Debug("failed to remove manifest from store",
m.logger.Warn("failed to remove manifest from store",
"manifest_hash", manifest.Hash(),
)
}

if err := m.removeBundle(manifest.ExplodedDataDir); err != nil {
m.logger.Error("failed to remove exploded bundle",
m.logger.Error("failed to remove bundle",
"err", err,
)
}
Expand All @@ -642,11 +661,7 @@ func (m *Manager) removeBundle(dir string) error {
)

if err := os.RemoveAll(dir); err != nil {
m.logger.Info("failed to remove bundle",
"err", err,
"path", dir,
)
return fmt.Errorf("failed to remove bundle: %w", err)
return err
}

m.logger.Info("bundle removed",
Expand Down
15 changes: 2 additions & 13 deletions go/runtime/registry/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,12 +358,7 @@ func (r *runtime) run(ctx context.Context) {
continue
}

// Support cleanup of runtime bundles only for compute nodes.
// Key managers are allowed to run obsolete runtime versions.
if !rt.IsCompute() {
continue
}

// Cleanup runtime bundles.
active := rt.ActiveDeployment(epoch)
if active == nil {
continue
Expand Down Expand Up @@ -399,13 +394,7 @@ func (r *runtime) run(ctx context.Context) {
}
}

// Support hot-loading of runtime bundles only for compute nodes.
// Key managers must configure the bundle statically to prevent
// running different versions of the runtime.
if !rt.IsCompute() {
continue
}

// Hot-load runtime bundles.
now, err := r.consensus.Beacon().GetEpoch(ctx, consensus.HeightLatest)
if err != nil {
r.logger.Error("failed to get current epoch",
Expand Down

0 comments on commit b330bac

Please sign in to comment.