Skip to content

Commit

Permalink
Merge branch 'cherry-pick-for-0.14.0-rc.3' into 'release-0.14'
Browse files Browse the repository at this point in the history
Cherry-pick changes for 0.14.0-rc.3 release

See merge request nvidia/kubernetes/device-plugin!281
  • Loading branch information
Evan Lezar committed Mar 28, 2023
2 parents 59df9dd + d3a3de5 commit 9292e66
Show file tree
Hide file tree
Showing 17 changed files with 639 additions and 216 deletions.
14 changes: 10 additions & 4 deletions api/config/v1/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@

package v1

import (
cdiapi "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
)

// Constants related to resource names
const (
ResourceNamePrefix = "nvidia.com"
Expand All @@ -32,8 +36,9 @@ const (

// Constants to represent the various device list strategies
const (
DeviceListStrategyEnvvar = "envvar"
DeviceListStrategyVolumeMounts = "volume-mounts"
DeviceListStrategyEnvvar = "envvar"
DeviceListStrategyVolumeMounts = "volume-mounts"
DeviceListStrategyCDIAnnotations = "cdi-annotations"
)

// Constants to represent the various device id strategies
Expand All @@ -44,6 +49,7 @@ const (

// Constants related to generating CDI specifications
const (
DefaultNvidiaCTKPath = "/usr/bin/nvidia-ctk"
DefaultDriverRootCtrPath = "/driver-root"
DefaultCDIAnnotationPrefix = cdiapi.AnnotationPrefix
DefaultNvidiaCTKPath = "/usr/bin/nvidia-ctk"
DefaultContainerDriverRoot = "/driver-root"
)
22 changes: 12 additions & 10 deletions api/config/v1/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ func updateFromCLIFlag[T any](pflag **T, c *cli.Context, flagName string) {
switch flag := any(pflag).(type) {
case **string:
*flag = ptr(c.String(flagName))
case **[]string:
*flag = ptr(c.StringSlice(flagName))
case **bool:
*flag = ptr(c.Bool(flagName))
case **Duration:
Expand All @@ -57,12 +59,12 @@ type CommandLineFlags struct {

// PluginCommandLineFlags holds the list of command line flags specific to the device plugin.
type PluginCommandLineFlags struct {
PassDeviceSpecs *bool `json:"passDeviceSpecs" yaml:"passDeviceSpecs"`
DeviceListStrategy *string `json:"deviceListStrategy" yaml:"deviceListStrategy"`
DeviceIDStrategy *string `json:"deviceIDStrategy" yaml:"deviceIDStrategy"`
CDIEnabled *bool `json:"CDIEnabled" yaml:"CDIEnabled"`
NvidiaCTKPath *string `json:"nvidiaCTKPath" yaml:"nvidiaCTKPath"`
DriverRootCtrPath *string `json:"driverRootCtrPath" yaml:"driverRootCtrPath"`
PassDeviceSpecs *bool `json:"passDeviceSpecs" yaml:"passDeviceSpecs"`
DeviceListStrategy *[]string `json:"deviceListStrategy" yaml:"deviceListStrategy"`
DeviceIDStrategy *string `json:"deviceIDStrategy" yaml:"deviceIDStrategy"`
CDIAnnotationPrefix *string `json:"cdiAnnotationPrefix" yaml:"cdiAnnotationPrefix"`
NvidiaCTKPath *string `json:"nvidiaCTKPath" yaml:"nvidiaCTKPath"`
ContainerDriverRoot *string `json:"containerDriverRoot" yaml:"containerDriverRoot"`
}

// GFDCommandLineFlags holds the list of command line flags specific to GFD.
Expand Down Expand Up @@ -102,12 +104,12 @@ func (f *Flags) UpdateFromCLIFlags(c *cli.Context, flags []cli.Flag) {
updateFromCLIFlag(&f.Plugin.DeviceListStrategy, c, n)
case "device-id-strategy":
updateFromCLIFlag(&f.Plugin.DeviceIDStrategy, c, n)
case "cdi-enabled":
updateFromCLIFlag(&f.Plugin.CDIEnabled, c, n)
case "cdi-annotation-prefix":
updateFromCLIFlag(&f.Plugin.CDIAnnotationPrefix, c, n)
case "nvidia-ctk-path":
updateFromCLIFlag(&f.Plugin.NvidiaCTKPath, c, n)
case "driver-root-ctr-path":
updateFromCLIFlag(&f.Plugin.DriverRootCtrPath, c, n)
case "container-driver-root":
updateFromCLIFlag(&f.Plugin.ContainerDriverRoot, c, n)
}
// GFD specific flags
if f.GFD == nil {
Expand Down
58 changes: 58 additions & 0 deletions api/config/v1/strategy.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright (c), NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package v1

import (
"fmt"
"strings"
)

// DeviceListStrategies defines which strategies are enabled and should
// be used when passing the device list to the container runtime.
type DeviceListStrategies map[string]bool

// NewDeviceListStrategies constructs a new DeviceListStrategy
func NewDeviceListStrategies(strategies []string) (DeviceListStrategies, error) {
ret := map[string]bool{
DeviceListStrategyEnvvar: false,
DeviceListStrategyVolumeMounts: false,
DeviceListStrategyCDIAnnotations: false,
}
for _, s := range strategies {
if _, ok := ret[s]; !ok {
return nil, fmt.Errorf("invalid strategy: %v", s)
}
ret[s] = true
}

return DeviceListStrategies(ret), nil
}

// Includes returns whether the given strategy is present in the set of strategies.
func (s DeviceListStrategies) Includes(strategy string) bool {
return s[strategy]
}

// IsCDIEnabled returns whether any of the strategies being used require CDI.
func (s DeviceListStrategies) IsCDIEnabled() bool {
for k, v := range s {
if strings.HasPrefix(k, "cdi-") && v {
return true
}
}
return false
}
31 changes: 14 additions & 17 deletions cmd/nvidia-device-plugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@ func main() {
Usage: "pass the list of DeviceSpecs to the kubelet on Allocate()",
EnvVars: []string{"PASS_DEVICE_SPECS"},
},
&cli.StringFlag{
&cli.StringSliceFlag{
Name: "device-list-strategy",
Value: spec.DeviceListStrategyEnvvar,
Usage: "the desired strategy for passing the device list to the underlying runtime:\n\t\t[envvar | volume-mounts]",
Value: cli.NewStringSlice(string(spec.DeviceListStrategyEnvvar)),
Usage: "the desired strategy for passing the device list to the underlying runtime:\n\t\t[envvar | volume-mounts | cdi-annotations]",
EnvVars: []string{"DEVICE_LIST_STRATEGY"},
},
&cli.StringFlag{
Expand All @@ -98,11 +98,11 @@ func main() {
Destination: &configFile,
EnvVars: []string{"CONFIG_FILE"},
},
&cli.BoolFlag{
Name: "cdi-enabled",
Value: false,
Usage: "enable the generation of a CDI specification; use CDI annotations when passing the device list to the underlying runtime",
EnvVars: []string{"CDI_ENABLED"},
&cli.StringFlag{
Name: "cdi-annotation-prefix",
Value: spec.DefaultCDIAnnotationPrefix,
Usage: "the prefix to use for CDI container annotation keys",
EnvVars: []string{"CDI_ANNOTATION_PREFIX"},
},
&cli.StringFlag{
Name: "nvidia-ctk-path",
Expand All @@ -111,10 +111,10 @@ func main() {
EnvVars: []string{"NVIDIA_CTK_PATH"},
},
&cli.StringFlag{
Name: "driver-root-ctr-path",
Value: spec.DefaultDriverRootCtrPath,
Name: "container-driver-root",
Value: spec.DefaultContainerDriverRoot,
Usage: "the path where the NVIDIA driver root is mounted in the container; used for generating CDI specifications",
EnvVars: []string{"DRIVER_ROOT_CTR_PATH"},
EnvVars: []string{"CONTAINER_DRIVER_ROOT"},
},
}

Expand All @@ -126,12 +126,9 @@ func main() {
}

func validateFlags(config *spec.Config) error {
allowedDeviceListStrategy := map[string]bool{
spec.DeviceListStrategyEnvvar: true,
spec.DeviceListStrategyVolumeMounts: true,
}
if !allowedDeviceListStrategy[*config.Flags.Plugin.DeviceListStrategy] {
return fmt.Errorf("invalid --device-list-strategy option: %v", *config.Flags.Plugin.DeviceListStrategy)
_, err := spec.NewDeviceListStrategies(*config.Flags.Plugin.DeviceListStrategy)
if err != nil {
return fmt.Errorf("invalid --device-list-strategy option: %v", err)
}

if *config.Flags.Plugin.DeviceIDStrategy != spec.DeviceIDStrategyUUID && *config.Flags.Plugin.DeviceIDStrategy != spec.DeviceIDStrategyIndex {
Expand Down
13 changes: 10 additions & 3 deletions cmd/nvidia-device-plugin/plugin-manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,16 @@ func NewPluginManager(config *spec.Config) (manager.Interface, error) {

nvmllib := nvml.New()

deviceListStrategies, err := spec.NewDeviceListStrategies(*config.Flags.Plugin.DeviceListStrategy)
if err != nil {
return nil, fmt.Errorf("invalid device list strategy: %v", err)
}

cdiEnabled := deviceListStrategies.IsCDIEnabled()

cdiHandler, err := cdi.New(
cdi.WithEnabled(*config.Flags.Plugin.CDIEnabled),
cdi.WithDriverRoot(*config.Flags.Plugin.DriverRootCtrPath),
cdi.WithEnabled(cdiEnabled),
cdi.WithDriverRoot(*config.Flags.Plugin.ContainerDriverRoot),
cdi.WithTargetDriverRoot(*config.Flags.NvidiaDriverRoot),
cdi.WithNvidiaCTKPath(*config.Flags.Plugin.NvidiaCTKPath),
cdi.WithNvml(nvmllib),
Expand All @@ -55,7 +62,7 @@ func NewPluginManager(config *spec.Config) (manager.Interface, error) {

m, err := manager.New(
manager.WithNVML(nvmllib),
manager.WithCDIEnabled(*config.Flags.Plugin.CDIEnabled),
manager.WithCDIEnabled(cdiEnabled),
manager.WithCDIHandler(cdiHandler),
manager.WithConfig(config),
manager.WithFailOnInitError(*config.Flags.FailOnInitError),
Expand Down
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,9 @@ require (
github.com/sirupsen/logrus v1.9.0
github.com/stretchr/testify v1.7.0
github.com/urfave/cli/v2 v2.4.0
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230314181732-4ea7dac0fae0
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230327171225-18ad7cd513cf
golang.org/x/net v0.7.0
golang.org/x/sys v0.5.0
google.golang.org/grpc v1.29.0
gopkg.in/yaml.v2 v2.4.0
k8s.io/api v0.19.1
k8s.io/apimachinery v0.19.1
k8s.io/client-go v0.19.1
Expand Down Expand Up @@ -75,13 +73,15 @@ require (
golang.org/x/crypto v0.0.0-20220314234659-1baeb1ce4c0b // indirect
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect
golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6 // indirect
golang.org/x/sys v0.5.0 // indirect
golang.org/x/term v0.5.0 // indirect
golang.org/x/text v0.7.0 // indirect
golang.org/x/time v0.0.0-20191024005414-555d28b269f0 // indirect
google.golang.org/appengine v1.6.5 // indirect
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 // indirect
google.golang.org/protobuf v1.27.1 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/utils v0.0.0-20200729134348-d5654de09c73 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.0.1 // indirect
Expand Down
8 changes: 2 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,6 @@ github.com/NVIDIA/go-nvml v0.12.0-1/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VK
github.com/NVIDIA/gpu-monitoring-tools v0.0.0-20201109160820-d08ea3cdcce4/go.mod h1:l0Cq257MSJMvg9URCXUjc8pgKY2SK1oSvIx6qG0bzzc=
github.com/NVIDIA/gpu-monitoring-tools v0.0.0-20201222072828-352eb4c503a7 h1:arlBTKD1OAGOTTzji1maOVbzii45LjdlvhZFg3vJeYk=
github.com/NVIDIA/gpu-monitoring-tools v0.0.0-20201222072828-352eb4c503a7/go.mod h1:l0Cq257MSJMvg9URCXUjc8pgKY2SK1oSvIx6qG0bzzc=
github.com/NVIDIA/nvidia-container-toolkit v1.13.0-rc.1.0.20230228180429-1e9b7883cf0d h1:SXTXtdPKdyB2/2HYbrL4i/RWdWXfCvN2bDCWfuwNXN0=
github.com/NVIDIA/nvidia-container-toolkit v1.13.0-rc.1.0.20230228180429-1e9b7883cf0d/go.mod h1:hpT9RjmZ4jIdgcaHy/sWxrHGBYl6Iuk+LlnaISQltho=
github.com/NVIDIA/nvidia-container-toolkit v1.13.0-rc.1.0.20230306111649-f36c514f1fc9 h1:I1BlQoITbWdLk26HRJX0r6AxppViUnH7mjTseoYPAQ8=
github.com/NVIDIA/nvidia-container-toolkit v1.13.0-rc.1.0.20230306111649-f36c514f1fc9/go.mod h1:hpT9RjmZ4jIdgcaHy/sWxrHGBYl6Iuk+LlnaISQltho=
github.com/NVIDIA/nvidia-container-toolkit v1.13.0-rc.2.0.20230320080859-1962fd68df0e h1:nT5iDLtRbrnDMuwdzoc6xaKgf5R098jevRP0+wAlMT0=
github.com/NVIDIA/nvidia-container-toolkit v1.13.0-rc.2.0.20230320080859-1962fd68df0e/go.mod h1:hpT9RjmZ4jIdgcaHy/sWxrHGBYl6Iuk+LlnaISQltho=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
Expand Down Expand Up @@ -634,8 +630,8 @@ github.com/xlab/handysort v0.0.0-20150421192137-fb3537ed64a1/go.mod h1:QcJo0QPSf
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230314181732-4ea7dac0fae0 h1:ZeMqI2Ll6HjwvoPriIa+XLinpICrl8njougXBkaI95c=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230314181732-4ea7dac0fae0/go.mod h1:b1w+5pfSylVIgnjO8l0ixvRoUs6Lgd+Y/vkImfVKymE=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230327171225-18ad7cd513cf h1:mq26GXnc6zbsM7DD6Wmv0aQdcfXaRfQxLMmRj5G7ITY=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230327171225-18ad7cd513cf/go.mod h1:KYZksBgh18o+uzgnpDazzG4LVYtnfB96VXHMXypEtik=
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
Expand Down
Loading

0 comments on commit 9292e66

Please sign in to comment.