synchronization

This commit is contained in:
2025-08-25 16:04:00 +08:00
commit 33f9b3ce46
1951 changed files with 854396 additions and 0 deletions

View File

@@ -0,0 +1,38 @@
version: 2.1
jobs:
build:
working_directory: ~/repo
docker:
- image: cimg/go:1.19.3
steps:
- checkout
- restore_cache:
keys:
- go-mod-v4-{{ checksum "go.sum" }}
- run:
name: Install Dependencies
command: go mod download
- save_cache:
key: go-mod-v4-{{ checksum "go.sum" }}
paths:
- "/go/pkg/mod"
- run:
name: Run tests
command: |
mkdir -p /tmp/test-reports
gotestsum --junitfile /tmp/test-reports/unit-tests.xml
- store_test_results:
path: /tmp/test-reports
- run:
name: binary
command: |
make build-server
# Invoke jobs via workflows
# See: https://circleci.com/docs/2.0/configuration-reference/#workflows
workflows:
sample: # This is the name of the workflow, feel free to change it to better match your workflow.
# Inside the workflow, you define the jobs you want to run.
jobs:
- build

View File

@@ -0,0 +1,14 @@
language: go
go:
- "1.10"
go_import_path: github.com/AliyunContainerService/gpushare-scheduler-extender
# let us have speedy Docker-based Travis workers
sudo: false
script:
- go build -o gpushare-sche-extender cmd/*.go
- go vet ./...
- go test ./...

View File

@@ -0,0 +1,12 @@
FROM golang:1.10-alpine as build
WORKDIR /go/src/github.com/AliyunContainerService/gpushare-scheduler-extender
COPY . .
RUN go build -o /go/bin/gpushare-sche-extender cmd/*.go
FROM alpine
COPY --from=build /go/bin/gpushare-sche-extender /usr/bin/gpushare-sche-extender
CMD ["gpushare-sche-extender"]

522
gpushare-scheduler-extender/Gopkg.lock generated Normal file
View File

@@ -0,0 +1,522 @@
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
[[projects]]
branch = "master"
digest = "1:3a60921f3c99de45d26b372450e666196a123def1edc2fb543c33db4a93813a7"
name = "github.com/comail/colog"
packages = ["."]
pruneopts = "UT"
revision = "fba8e7b1f46c3607f09760ce3880066e7ff57c5a"
[[projects]]
digest = "1:ffe9824d294da03b391f44e1ae8281281b4afc1bdaa9588c9097785e3af10cec"
name = "github.com/davecgh/go-spew"
packages = ["spew"]
pruneopts = "UT"
revision = "8991bc29aa16c548c550c7ff78260e27b9ab7c73"
version = "v1.1.1"
[[projects]]
digest = "1:2cd7915ab26ede7d95b8749e6b1f933f1c6d5398030684e6505940a10f31cfda"
name = "github.com/ghodss/yaml"
packages = ["."]
pruneopts = "UT"
revision = "0ca9ea5df5451ffdf184b4428c902747c2c11cd7"
version = "v1.0.0"
[[projects]]
digest = "1:34e709f36fd4f868fb00dbaf8a6cab4c1ae685832d392874ba9d7c5dec2429d1"
name = "github.com/gogo/protobuf"
packages = [
"proto",
"sortkeys",
]
pruneopts = "UT"
revision = "636bf0302bc95575d69441b25a2603156ffdddf1"
version = "v1.1.1"
[[projects]]
branch = "master"
digest = "1:1ba1d79f2810270045c328ae5d674321db34e3aae468eb4233883b473c5c0467"
name = "github.com/golang/glog"
packages = ["."]
pruneopts = "UT"
revision = "23def4e6c14b4da8ac2ed8007337bc5eb5007998"
[[projects]]
branch = "master"
digest = "1:3fb07f8e222402962fa190eb060608b34eddfb64562a18e2167df2de0ece85d8"
name = "github.com/golang/groupcache"
packages = ["lru"]
pruneopts = "UT"
revision = "c65c006176ff7ff98bb916961c7abbc6b0afc0aa"
[[projects]]
digest = "1:4c0989ca0bcd10799064318923b9bc2db6b4d6338dd75f3f2d86c3511aaaf5cf"
name = "github.com/golang/protobuf"
packages = [
"proto",
"ptypes",
"ptypes/any",
"ptypes/duration",
"ptypes/timestamp",
]
pruneopts = "UT"
revision = "aa810b61a9c79d51363740d207bb46cf8e620ed5"
version = "v1.2.0"
[[projects]]
branch = "master"
digest = "1:0bfbe13936953a98ae3cfe8ed6670d396ad81edf069a806d2f6515d7bb6950df"
name = "github.com/google/btree"
packages = ["."]
pruneopts = "UT"
revision = "4030bb1f1f0c35b30ca7009e9ebd06849dd45306"
[[projects]]
branch = "master"
digest = "1:3ee90c0d94da31b442dde97c99635aaafec68d0b8a3c12ee2075c6bdabeec6bb"
name = "github.com/google/gofuzz"
packages = ["."]
pruneopts = "UT"
revision = "24818f796faf91cd76ec7bddd72458fbced7a6c1"
[[projects]]
digest = "1:65c4414eeb350c47b8de71110150d0ea8a281835b1f386eacaa3ad7325929c21"
name = "github.com/googleapis/gnostic"
packages = [
"OpenAPIv2",
"compiler",
"extensions",
]
pruneopts = "UT"
revision = "7c663266750e7d82587642f65e60bc4083f1f84e"
version = "v0.2.0"
[[projects]]
digest = "1:878f0defa9b853f9acfaf4a162ba450a89d0050eff084f9fe7f5bd15948f172a"
name = "github.com/gregjones/httpcache"
packages = [
".",
"diskcache",
]
pruneopts = "UT"
revision = "787624de3eb7bd915c329cba748687a3b22666a6"
[[projects]]
digest = "1:8ec8d88c248041a6df5f6574b87bc00e7e0b493881dad2e7ef47b11dc69093b5"
name = "github.com/hashicorp/golang-lru"
packages = [
".",
"simplelru",
]
pruneopts = "UT"
revision = "20f1fb78b0740ba8c3cb143a61e86ba5c8669768"
version = "v0.5.0"
[[projects]]
digest = "1:8eb1de8112c9924d59bf1d3e5c26f5eaa2bfc2a5fcbb92dc1c2e4546d695f277"
name = "github.com/imdario/mergo"
packages = ["."]
pruneopts = "UT"
revision = "9f23e2d6bd2a77f959b2bf6acdbefd708a83a4a4"
version = "v0.3.6"
[[projects]]
digest = "1:3e551bbb3a7c0ab2a2bf4660e7fcad16db089fdcfbb44b0199e62838038623ea"
name = "github.com/json-iterator/go"
packages = ["."]
pruneopts = "UT"
revision = "1624edc4454b8682399def8740d46db5e4362ba4"
version = "v1.1.5"
[[projects]]
digest = "1:f97285a3b0a496dcf8801072622230d513f69175665d94de60eb042d03387f6c"
name = "github.com/julienschmidt/httprouter"
packages = ["."]
pruneopts = "UT"
revision = "348b672cd90d8190f8240323e372ecd1e66b59dc"
version = "v1.2.0"
[[projects]]
digest = "1:33422d238f147d247752996a26574ac48dcf472976eda7f5134015f06bf16563"
name = "github.com/modern-go/concurrent"
packages = ["."]
pruneopts = "UT"
revision = "bacd9c7ef1dd9b15be4a9909b8ac7a4e313eec94"
version = "1.0.3"
[[projects]]
digest = "1:e32bdbdb7c377a07a9a46378290059822efdce5c8d96fe71940d87cb4f918855"
name = "github.com/modern-go/reflect2"
packages = ["."]
pruneopts = "UT"
revision = "4b7aa43c6742a2c18fdef89dd197aaae7dac7ccd"
version = "1.0.1"
[[projects]]
branch = "master"
digest = "1:3bf17a6e6eaa6ad24152148a631d18662f7212e21637c2699bff3369b7f00fa2"
name = "github.com/petar/GoLLRB"
packages = ["llrb"]
pruneopts = "UT"
revision = "53be0d36a84c2a886ca057d34b6aa4468df9ccb4"
[[projects]]
digest = "1:0e7775ebbcf00d8dd28ac663614af924411c868dca3d5aa762af0fae3808d852"
name = "github.com/peterbourgon/diskv"
packages = ["."]
pruneopts = "UT"
revision = "5f041e8faa004a95c88a202771f4cc3e991971e6"
version = "v2.0.1"
[[projects]]
digest = "1:c1b1102241e7f645bc8e0c22ae352e8f0dc6484b6cb4d132fa9f24174e0119e2"
name = "github.com/spf13/pflag"
packages = ["."]
pruneopts = "UT"
revision = "298182f68c66c05229eb03ac171abe6e309ee79a"
version = "v1.0.3"
[[projects]]
branch = "release-branch.go1.11"
digest = "1:3f3a05ae0b95893d90b9b3b5afdb79a9b3d96e4e36e099d841ae602e4aca0da8"
name = "golang.org/x/crypto"
packages = ["ssh/terminal"]
pruneopts = "UT"
revision = "56440b844dfe139a8ac053f4ecac0b20b79058f4"
[[projects]]
branch = "master"
digest = "1:b41f13f4d5dbbd63abe9bf646575e4f83d2637ca243b97ea79b7807242518e8c"
name = "golang.org/x/net"
packages = [
"context",
"http/httpguts",
"http2",
"http2/hpack",
"idna",
]
pruneopts = "UT"
revision = "9b4f9f5ad5197c79fd623a3638e70d8b26cef344"
[[projects]]
branch = "release-branch.go1.11"
digest = "1:a60cae5be8993938498243605b120290533a5208fd5cac81c932afbad3642fb0"
name = "golang.org/x/sys"
packages = [
"unix",
"windows",
]
pruneopts = "UT"
revision = "98c5dad5d1a0e8a73845ecc8897d0bd56586511d"
[[projects]]
digest = "1:a2ab62866c75542dd18d2b069fec854577a20211d7c0ea6ae746072a1dccdd18"
name = "golang.org/x/text"
packages = [
"collate",
"collate/build",
"internal/colltab",
"internal/gen",
"internal/tag",
"internal/triegen",
"internal/ucd",
"language",
"secure/bidirule",
"transform",
"unicode/bidi",
"unicode/cldr",
"unicode/norm",
"unicode/rangetable",
]
pruneopts = "UT"
revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0"
version = "v0.3.0"
[[projects]]
digest = "1:d37b0ef2944431fe9e8ef35c6fffc8990d9e2ca300588df94a6890f3649ae365"
name = "golang.org/x/time"
packages = ["rate"]
pruneopts = "UT"
revision = "f51c12702a4d776e4c1fa9b0fabab841babae631"
[[projects]]
digest = "1:2d1fbdc6777e5408cabeb02bf336305e724b925ff4546ded0fa8715a7267922a"
name = "gopkg.in/inf.v0"
packages = ["."]
pruneopts = "UT"
revision = "d2d2541c53f18d2a059457998ce2876cc8e67cbf"
version = "v0.9.1"
[[projects]]
digest = "1:342378ac4dcb378a5448dd723f0784ae519383532f5e70ade24132c4c8693202"
name = "gopkg.in/yaml.v2"
packages = ["."]
pruneopts = "UT"
revision = "5420a8b6744d3b0345ab293f6fcba19c978f1183"
version = "v2.2.1"
[[projects]]
digest = "1:34ffbf9ed5e63a11e4e0aaab597dc36c552da8b5b6bd49d8f73dadd4afd7e677"
name = "k8s.io/api"
packages = [
"admissionregistration/v1alpha1",
"admissionregistration/v1beta1",
"apps/v1",
"apps/v1beta1",
"apps/v1beta2",
"authentication/v1",
"authentication/v1beta1",
"authorization/v1",
"authorization/v1beta1",
"autoscaling/v1",
"autoscaling/v2beta1",
"batch/v1",
"batch/v1beta1",
"batch/v2alpha1",
"certificates/v1beta1",
"core/v1",
"events/v1beta1",
"extensions/v1beta1",
"networking/v1",
"policy/v1beta1",
"rbac/v1",
"rbac/v1alpha1",
"rbac/v1beta1",
"scheduling/v1alpha1",
"scheduling/v1beta1",
"settings/v1alpha1",
"storage/v1",
"storage/v1alpha1",
"storage/v1beta1",
]
pruneopts = "UT"
revision = "2d6f90ab1293a1fb871cf149423ebb72aa7423aa"
version = "kubernetes-1.11.2"
[[projects]]
branch = "release-1.11"
digest = "1:33730fb514340e487c72597b579737e896b51a79117aedd6a4e24b59f3e949dc"
name = "k8s.io/apimachinery"
packages = [
"pkg/api/errors",
"pkg/api/meta",
"pkg/api/resource",
"pkg/apis/meta/internalversion",
"pkg/apis/meta/v1",
"pkg/apis/meta/v1/unstructured",
"pkg/apis/meta/v1beta1",
"pkg/conversion",
"pkg/conversion/queryparams",
"pkg/fields",
"pkg/labels",
"pkg/runtime",
"pkg/runtime/schema",
"pkg/runtime/serializer",
"pkg/runtime/serializer/json",
"pkg/runtime/serializer/protobuf",
"pkg/runtime/serializer/recognizer",
"pkg/runtime/serializer/streaming",
"pkg/runtime/serializer/versioning",
"pkg/selection",
"pkg/types",
"pkg/util/cache",
"pkg/util/clock",
"pkg/util/diff",
"pkg/util/errors",
"pkg/util/framer",
"pkg/util/intstr",
"pkg/util/json",
"pkg/util/mergepatch",
"pkg/util/net",
"pkg/util/runtime",
"pkg/util/sets",
"pkg/util/strategicpatch",
"pkg/util/validation",
"pkg/util/validation/field",
"pkg/util/wait",
"pkg/util/yaml",
"pkg/version",
"pkg/watch",
"third_party/forked/golang/json",
"third_party/forked/golang/reflect",
]
pruneopts = "UT"
revision = "def12e63c512da17043b4f0293f52d1006603d9f"
[[projects]]
digest = "1:8eb81e42ff3f9a1fc252fc38d0b81d904907d1c56fb0e3b2052fb932f6dcd8af"
name = "k8s.io/client-go"
packages = [
"discovery",
"informers",
"informers/admissionregistration",
"informers/admissionregistration/v1alpha1",
"informers/admissionregistration/v1beta1",
"informers/apps",
"informers/apps/v1",
"informers/apps/v1beta1",
"informers/apps/v1beta2",
"informers/autoscaling",
"informers/autoscaling/v1",
"informers/autoscaling/v2beta1",
"informers/batch",
"informers/batch/v1",
"informers/batch/v1beta1",
"informers/batch/v2alpha1",
"informers/certificates",
"informers/certificates/v1beta1",
"informers/core",
"informers/core/v1",
"informers/events",
"informers/events/v1beta1",
"informers/extensions",
"informers/extensions/v1beta1",
"informers/internalinterfaces",
"informers/networking",
"informers/networking/v1",
"informers/policy",
"informers/policy/v1beta1",
"informers/rbac",
"informers/rbac/v1",
"informers/rbac/v1alpha1",
"informers/rbac/v1beta1",
"informers/scheduling",
"informers/scheduling/v1alpha1",
"informers/scheduling/v1beta1",
"informers/settings",
"informers/settings/v1alpha1",
"informers/storage",
"informers/storage/v1",
"informers/storage/v1alpha1",
"informers/storage/v1beta1",
"kubernetes",
"kubernetes/scheme",
"kubernetes/typed/admissionregistration/v1alpha1",
"kubernetes/typed/admissionregistration/v1beta1",
"kubernetes/typed/apps/v1",
"kubernetes/typed/apps/v1beta1",
"kubernetes/typed/apps/v1beta2",
"kubernetes/typed/authentication/v1",
"kubernetes/typed/authentication/v1beta1",
"kubernetes/typed/authorization/v1",
"kubernetes/typed/authorization/v1beta1",
"kubernetes/typed/autoscaling/v1",
"kubernetes/typed/autoscaling/v2beta1",
"kubernetes/typed/batch/v1",
"kubernetes/typed/batch/v1beta1",
"kubernetes/typed/batch/v2alpha1",
"kubernetes/typed/certificates/v1beta1",
"kubernetes/typed/core/v1",
"kubernetes/typed/events/v1beta1",
"kubernetes/typed/extensions/v1beta1",
"kubernetes/typed/networking/v1",
"kubernetes/typed/policy/v1beta1",
"kubernetes/typed/rbac/v1",
"kubernetes/typed/rbac/v1alpha1",
"kubernetes/typed/rbac/v1beta1",
"kubernetes/typed/scheduling/v1alpha1",
"kubernetes/typed/scheduling/v1beta1",
"kubernetes/typed/settings/v1alpha1",
"kubernetes/typed/storage/v1",
"kubernetes/typed/storage/v1alpha1",
"kubernetes/typed/storage/v1beta1",
"listers/admissionregistration/v1alpha1",
"listers/admissionregistration/v1beta1",
"listers/apps/v1",
"listers/apps/v1beta1",
"listers/apps/v1beta2",
"listers/autoscaling/v1",
"listers/autoscaling/v2beta1",
"listers/batch/v1",
"listers/batch/v1beta1",
"listers/batch/v2alpha1",
"listers/certificates/v1beta1",
"listers/core/v1",
"listers/events/v1beta1",
"listers/extensions/v1beta1",
"listers/networking/v1",
"listers/policy/v1beta1",
"listers/rbac/v1",
"listers/rbac/v1alpha1",
"listers/rbac/v1beta1",
"listers/scheduling/v1alpha1",
"listers/scheduling/v1beta1",
"listers/settings/v1alpha1",
"listers/storage/v1",
"listers/storage/v1alpha1",
"listers/storage/v1beta1",
"pkg/apis/clientauthentication",
"pkg/apis/clientauthentication/v1alpha1",
"pkg/apis/clientauthentication/v1beta1",
"pkg/version",
"plugin/pkg/client/auth/exec",
"rest",
"rest/watch",
"tools/auth",
"tools/cache",
"tools/clientcmd",
"tools/clientcmd/api",
"tools/clientcmd/api/latest",
"tools/clientcmd/api/v1",
"tools/metrics",
"tools/pager",
"tools/record",
"tools/reference",
"transport",
"util/buffer",
"util/cert",
"util/connrotation",
"util/flowcontrol",
"util/homedir",
"util/integer",
"util/retry",
"util/workqueue",
]
pruneopts = "UT"
revision = "7d04d0e2a0a1a4d4a1cd6baa432a2301492e4e65"
version = "v8.0.0"
[[projects]]
branch = "feature-serverside-apply"
digest = "1:e0d6dcb28c42a53c7243bb6380badd17f92fbd8488a075a07e984f91a07c0d23"
name = "k8s.io/kube-openapi"
packages = ["pkg/util/proto"]
pruneopts = "UT"
revision = "f442ecb314a3679150c272e2b9713d8deed5955d"
[[projects]]
digest = "1:e8d4cbd6c9b88d5ff69bb90793c3d12d7ce2530ba2681dad7eefef443449dcc5"
name = "k8s.io/kubernetes"
packages = ["pkg/scheduler/api"]
pruneopts = "UT"
revision = "4ed3216f3ec431b140b1d899130a69fc671678f4"
version = "v1.12.1"
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
input-imports = [
"github.com/comail/colog",
"github.com/julienschmidt/httprouter",
"k8s.io/api/core/v1",
"k8s.io/apimachinery/pkg/api/errors",
"k8s.io/apimachinery/pkg/apis/meta/v1",
"k8s.io/apimachinery/pkg/types",
"k8s.io/apimachinery/pkg/util/runtime",
"k8s.io/apimachinery/pkg/util/wait",
"k8s.io/client-go/informers",
"k8s.io/client-go/kubernetes",
"k8s.io/client-go/kubernetes/scheme",
"k8s.io/client-go/kubernetes/typed/core/v1",
"k8s.io/client-go/listers/core/v1",
"k8s.io/client-go/tools/cache",
"k8s.io/client-go/tools/clientcmd",
"k8s.io/client-go/tools/record",
"k8s.io/client-go/util/workqueue",
"k8s.io/kubernetes/pkg/scheduler/api",
]
solver-name = "gps-cdcl"
solver-version = 1

View File

@@ -0,0 +1,55 @@
# Gopkg.toml example
#
# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
# for detailed Gopkg.toml documentation.
#
# required = ["github.com/user/thing/cmd/thing"]
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
#
# [[constraint]]
# name = "github.com/user/project"
# version = "1.0.0"
#
# [[constraint]]
# name = "github.com/user/project2"
# branch = "dev"
# source = "github.com/myfork/project2"
#
# [[override]]
# name = "github.com/x/y"
# version = "2.4.0"
#
# [prune]
# non-go = false
# go-tests = true
# unused-packages = true
[[constraint]]
name = "k8s.io/kubernetes"
version = "v1.11.2"
[[constraint]]
name = "k8s.io/apimachinery"
branch = "release-1.11"
[[constraint]]
name = "k8s.io/client-go"
version = "~v8.0.0"
[[override]]
name = "k8s.io/api"
version = "kubernetes-1.11.2"
[[override]]
name = "github.com/gregjones/httpcache"
revision = "787624de3eb7bd915c329cba748687a3b22666a6"
[[override]]
name = "golang.org/x/time"
revision = "f51c12702a4d776e4c1fa9b0fabab841babae631"
[prune]
go-tests = true
unused-packages = true

View File

@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -0,0 +1,24 @@
# Definitions
# support x86、arm macos or x86 linux
DockerBuild = docker build
DockerRun = docker run
ifeq ($(shell uname -p),arm)
DockerBuild = docker buildx build --platform=linux/amd64
DockerRun = docker run --platform=linux/amd64
endif
# Definitions
IMAGE := registry.cn-hangzhou.aliyuncs.com/acs/gpushare-scheduler-extender
GIT_VERSION := $(shell git rev-parse --short=7 HEAD)
COMMIT_ID := $(shell git describe --match=NeVeRmAtCh --abbrev=99 --tags --always --dirty)
GOLANG_DOCKER_IMAGE := golang:1.19
build-server:
go build -o bin/gpushare-sche-extender ./cmd/main.go
build-image:
${DockerBuild} -t ${IMAGE}:${GIT_VERSION} -f scripts/build/Dockerfile .
local-build-image:
GOOS=linux GOARCH=amd64 go build -o bin/gpushare-sche-extender ./cmd/main.go
${DockerBuild} -t ${IMAGE}:${GIT_VERSION} -f scripts/build/Dockerfile-local .

View File

@@ -0,0 +1,95 @@
# GPU Sharing Scheduler Extender in Kubernetes
[![CircleCI](https://circleci.com/gh/AliyunContainerService/gpushare-scheduler-extender.svg?style=svg)](https://circleci.com/gh/AliyunContainerService/gpushare-scheduler-extender)
[![Build Status](https://travis-ci.org/AliyunContainerService/gpushare-scheduler-extender.svg?branch=master)](https://travis-ci.org/AliyunContainerService/gpushare-scheduler-extender)
[![Go Report Card](https://goreportcard.com/badge/github.com/AliyunContainerService/gpushare-scheduler-extender)](https://goreportcard.com/report/github.com/AliyunContainerService/gpushare-scheduler-extender)
## Overview
More and more data scientists run their Nvidia GPU based inference tasks on Kubernetes. Some of these tasks can be run on the same Nvidia GPU device to increase GPU utilization. So one important challenge is how to share GPUs between the pods. The community is also very interested in this [topic](https://github.com/kubernetes/kubernetes/issues/52757).
Now there is a GPU sharing solution on native Kubernetes: it is based on scheduler extenders and device plugin mechanism, so you can reuse this solution easily in your own Kubernetes.
## Prerequisites
- Kubernetes 1.11+
- golang 1.19+
- NVIDIA drivers ~= 361.93
- Nvidia-docker version > 2.0 (see how to [install](https://github.com/NVIDIA/nvidia-docker) and it's [prerequisites](https://github.com/nvidia/nvidia-docker/wiki/Installation-\(version-2.0\)#prerequisites))
- Docker configured with Nvidia as the [default runtime](https://github.com/NVIDIA/nvidia-docker/wiki/Advanced-topics#default-runtime).
## Design
For more details about the design of this project, please read this [Design document](docs/designs/designs.md).
## Setup
You can follow this [Installation Guide](docs/install.md). If you are using [Alibaba Cloud Kubernetes](https://cn.aliyun.com/product/kubernetes), please follow this [doc](deployer/README.md) to install with Helm Charts.
## User Guide
You can check this [User Guide](docs/userguide.md).
## Developing
### Scheduler Extender
```bash
git clone https://github.com/AliyunContainerService/gpushare-scheduler-extender.git && cd gpushare-scheduler-extender
make build-image
```
### Device Plugin
```bash
git clone https://github.com/AliyunContainerService/gpushare-device-plugin.git && cd gpushare-device-plugin
docker build -t cheyang/gpushare-device-plugin .
```
### Kubectl Extension
- golang > 1.10
```bash
mkdir -p $GOPATH/src/github.com/AliyunContainerService
cd $GOPATH/src/github.com/AliyunContainerService
git clone https://github.com/AliyunContainerService/gpushare-device-plugin.git
cd gpushare-device-plugin
go build -o $GOPATH/bin/kubectl-inspect-gpushare-v2 cmd/inspect/*.go
```
## Demo
### - Demo 1: Deploy multiple GPU Shared Pods and schedule them on the same GPU device in binpack way
[![](demo1.jpg)](http://cloud.video.taobao.com//play/u/2987821887/p/2/e/6/t/1/214292079721.mp4)
### - Demo 2: Avoid GPU memory requests that fit at the node level, but not at the GPU device level
[![](demo2.jpg)](http://cloud.video.taobao.com//play/u/2987821887/p/2/e/6/t/1/214235285109.mp4)
## Related Project
- [gpushare device plugin](https://github.com/AliyunContainerService/gpushare-device-plugin.git)
## Roadmap
- Integrate Nvidia MPS as the option for isolation
- Automated Deployment for the Kubernetes cluster which is deployed by kubeadm
- Scheduler Extener High Availablity
- Generic Solution for GPU, RDMA and other devices
## Adopters
If you are intrested in GPUShare and would like to share your experiences with others, you are warmly welcome to add your information on [ADOPTERS.md](docs/ADOPTERS.md) page. We will continuousely discuss new requirements and feature design with you in advance.
## Acknowledgments
- GPU sharing solution is based on [Nvidia Docker2](https://github.com/NVIDIA/nvidia-docker), and their [gpu sharing design](https://docs.google.com/document/d/1ZgKH_K4SEfdiE_OfxQ836s4yQWxZfSjS288Tq9YIWCA/edit#heading=h.r88v2xgacqr) is our reference. The Nvidia Community is very supportive and We are very grateful.

View File

@@ -0,0 +1,119 @@
package main
import (
"context"
"flag"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
"net/http"
"os"
"runtime"
"strconv"
"time"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/gpushare"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/routes"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/scheduler"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/utils/signals"
"github.com/julienschmidt/httprouter"
kubeinformers "k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
)
const RecommendedKubeConfigPathEnv = "KUBECONFIG"
var (
clientset *kubernetes.Clientset
resyncPeriod = 30 * time.Second
clientConfig clientcmd.ClientConfig
)
func initKubeClient() {
kubeConfig := ""
if len(os.Getenv(RecommendedKubeConfigPathEnv)) > 0 {
// use the current context in kubeconfig
// This is very useful for running locally.
kubeConfig = os.Getenv(RecommendedKubeConfigPathEnv)
}
// Get kubernetes config.
restConfig, err := clientcmd.BuildConfigFromFlags("", kubeConfig)
if err != nil {
log.Fatal("Error building kubeconfig: %s", err.Error())
}
// create the clientset
clientset, err = kubernetes.NewForConfig(restConfig)
if err != nil {
log.Fatal("fatal: Failed to init rest config due to %v", err)
}
}
func main() {
// Call Parse() to avoid noisy logs
flag.CommandLine.Parse([]string{})
ctx := context.Background()
var logLevel int32 = 10
switch os.Getenv("LOG_LEVEL") {
case "debug":
logLevel = 101
case "info":
logLevel = 50
case "warn":
logLevel = 10
case "error":
logLevel = 5
}
log.NewLoggerWithLevel(logLevel)
threadness := StringToInt(os.Getenv("THREADNESS"))
initKubeClient()
port := os.Getenv("PORT")
if _, err := strconv.Atoi(port); err != nil {
port = "39999"
}
// Set up signals so we handle the first shutdown signal gracefully.
stopCh := signals.SetupSignalHandler()
informerFactory := kubeinformers.NewSharedInformerFactory(clientset, resyncPeriod)
controller, err := gpushare.NewController(clientset, informerFactory, stopCh)
if err != nil {
log.Fatal("Failed to start due to %v", err)
}
err = controller.BuildCache()
if err != nil {
log.Fatal("Failed to start due to %v", err)
}
go controller.Run(threadness, stopCh)
gpusharePredicate := scheduler.NewGPUsharePredicate(clientset, controller.GetSchedulerCache())
gpushareBind := scheduler.NewGPUShareBind(ctx, clientset, controller.GetSchedulerCache())
gpushareInspect := scheduler.NewGPUShareInspect(controller.GetSchedulerCache())
router := httprouter.New()
routes.AddPProf(router)
routes.AddVersion(router)
routes.AddPredicate(router, gpusharePredicate)
routes.AddBind(router, gpushareBind)
routes.AddInspect(router, gpushareInspect)
log.V(3).Info("server starting on the port :%s", port)
if err := http.ListenAndServe(":"+port, router); err != nil {
log.Fatal("server listen fail %+v", err)
}
}
func StringToInt(sThread string) int {
thread := runtime.NumCPU()
if threadInt, err := strconv.Atoi(sThread); err == nil {
thread = threadInt
}
return thread
}

View File

@@ -0,0 +1,132 @@
# rbac.yaml
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: gpushare-schd-extender
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- apiGroups:
- ""
resources:
- pods
verbs:
- update
- patch
- get
- list
- watch
- apiGroups:
- ""
resources:
- bindings
- pods/binding
verbs:
- create
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get
- list
- watch
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: gpushare-schd-extender
namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: gpushare-schd-extender
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: gpushare-schd-extender
subjects:
- kind: ServiceAccount
name: gpushare-schd-extender
namespace: kube-system
# deployment yaml
---
kind: Deployment
apiVersion: apps/v1
metadata:
name: gpushare-schd-extender
namespace: kube-system
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: gpushare
component: gpushare-schd-extender
template:
metadata:
labels:
app: gpushare
component: gpushare-schd-extender
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
hostNetwork: true
tolerations:
- effect: NoSchedule
operator: Exists
key: node-role.kubernetes.io/master
- effect: NoSchedule
operator: Exists
key: node.cloudprovider.kubernetes.io/uninitialized
nodeSelector:
node-role.kubernetes.io/master: ""
serviceAccount: gpushare-schd-extender
containers:
- name: gpushare-schd-extender
image: registry.cn-hangzhou.aliyuncs.com/offends/rainbond:gpushare-scheduler-extender
env:
- name: LOG_LEVEL
value: debug
- name: PORT
value: "12345"
# service.yaml
---
apiVersion: v1
kind: Service
metadata:
name: gpushare-schd-extender
namespace: kube-system
labels:
app: gpushare
component: gpushare-schd-extender
spec:
type: NodePort
ports:
- port: 12345
name: http
targetPort: 12345
nodePort: 32766
selector:
# select app=ingress-nginx pods
app: gpushare
component: gpushare-schd-extender

View File

@@ -0,0 +1,67 @@
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-scheduler
tier: control-plane
name: kube-scheduler
namespace: kube-system
spec:
containers:
- command:
- kube-scheduler
- --authentication-kubeconfig=/etc/kubernetes/scheduler.conf
- --authorization-kubeconfig=/etc/kubernetes/scheduler.conf
- --bind-address=127.0.0.1
- --kubeconfig=/etc/kubernetes/scheduler.conf
- --leader-elect=true
- --config=/etc/kubernetes/scheduler-policy-config.yaml
image: k8s.gcr.io/kube-scheduler:v1.23.3
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 8
httpGet:
host: 127.0.0.1
path: /healthz
port: 10259
scheme: HTTPS
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
name: kube-scheduler
resources:
requests:
cpu: 100m
startupProbe:
failureThreshold: 24
httpGet:
host: 127.0.0.1
path: /healthz
port: 10259
scheme: HTTPS
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
volumeMounts:
- mountPath: /etc/kubernetes/scheduler.conf
name: kubeconfig
readOnly: true
- mountPath: /etc/kubernetes/scheduler-policy-config.yaml
name: scheduler-policy-config
readOnly: true
hostNetwork: true
priorityClassName: system-node-critical
securityContext:
seccompProfile:
type: RuntimeDefault
volumes:
- hostPath:
path: /etc/kubernetes/scheduler.conf
type: FileOrCreate
name: kubeconfig
- hostPath:
path: /etc/kubernetes/scheduler-policy-config.yaml
type: FileOrCreate
name: scheduler-policy-config
status: {}

View File

@@ -0,0 +1,59 @@
apiVersion: v1
kind: Pod
metadata:
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ""
creationTimestamp: null
labels:
component: kube-scheduler
tier: control-plane
name: kube-scheduler
namespace: kube-system
spec:
containers:
- command:
- kube-scheduler
- --address=127.0.0.1
- --kubeconfig=/etc/kubernetes/scheduler.conf
- --policy-config-file=/etc/kubernetes/scheduler-policy-config.json
- --leader-elect=true
image: registry.cn-hangzhou.aliyuncs.com/acs/kube-scheduler-amd64:v1.11.2
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 8
httpGet:
host: 127.0.0.1
path: /healthz
port: 10251
scheme: HTTP
initialDelaySeconds: 15
timeoutSeconds: 15
name: kube-scheduler
resources:
requests:
cpu: 100m
volumeMounts:
- mountPath: /etc/kubernetes/scheduler.conf
name: kubeconfig
readOnly: true
- mountPath: /etc/kubernetes/scheduler-policy-config.json
name: scheduler-policy-config
readOnly: true
- mountPath: /etc/localtime
name: localtime
readOnly: true
hostNetwork: true
priorityClassName: system-cluster-critical
volumes:
- hostPath:
path: /etc/kubernetes/scheduler.conf
type: FileOrCreate
name: kubeconfig
- hostPath:
path: /etc/kubernetes/scheduler-policy-config.json
type: FileOrCreate
name: scheduler-policy-config
- hostPath:
path: /etc/localtime
type: ""
name: localtime

View File

@@ -0,0 +1,20 @@
{
"kind": "Policy",
"apiVersion": "v1",
"extenders": [
{
"urlPrefix": "http://127.0.0.1:32766/gpushare-scheduler",
"filterVerb": "filter",
"bindVerb": "bind",
"enableHttps": false,
"nodeCacheCapable": true,
"managedResources": [
{
"name": "aliyun.com/gpu-mem",
"ignoredByScheduler": false
}
],
"ignorable": false
}
]
}

View File

@@ -0,0 +1,15 @@
---
apiVersion: kubescheduler.config.k8s.io/v1beta2
kind: KubeSchedulerConfiguration
clientConnection:
kubeconfig: /etc/kubernetes/scheduler.conf
extenders:
- urlPrefix: "http://127.0.0.1:32766/gpushare-scheduler"
filterVerb: filter
bindVerb: bind
enableHTTPS: false
nodeCacheCapable: true
managedResources:
- name: aliyun.com/gpu-mem
ignoredByScheduler: false
ignorable: false

Binary file not shown.

After

Width:  |  Height:  |  Size: 217 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 204 KiB

View File

@@ -0,0 +1,61 @@
## Install GPU Sharing with helm charts in Alibaba Cloud Kubernetes Service
## Requirements:
* Kubernetes >= 1.11, kubectl >= 1.12
* You'd better to choose [Alibaba Cloud Kubernetes Service](https://www.alibabacloud.com/product/kubernetes). The solution is only for the dedicated Kubernetes Cluster.
## Steps:
1.Just run:
```
git clone https://github.com/AliyunContainerService/gpushare-scheduler-extender.git
cd gpushare-scheduler-extender/deployer/chart
helm install --name gpushare --namespace kube-system --set masterCount=3 gpushare-installer
```
2.Add gpushare node labels to the nodes requiring GPU sharing
```bash
kubectl label node <target_node> gpushare=true
```
For example:
```bash
kubectl label no mynode gpushare=true
```
3.Install Kubectl extension
4.Install kubectl 1.12 or above
You can download and install `kubectl` for linux
```bash
curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.12.1/bin/linux/amd64/kubectl
chmod +x ./kubectl
sudo mv ./kubectl /usr/bin/kubectl
```
5.Download and install the kubectl extension
```bash
cd /usr/bin/
wget https://github.com/AliyunContainerService/gpushare-device-plugin/releases/download/v0.3.0/kubectl-inspect-gpushare
chmod u+x /usr/bin/kubectl-inspect-gpushare
```
6.Disable the gpushare node
```bash
kubectl label node <target_node> gpushare=false
```
For example:
```bash
kubectl label no mynode gpushare=false
```

View File

@@ -0,0 +1,21 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*~
# Various IDEs
.project
.idea/
*.tmproj

View File

@@ -0,0 +1,5 @@
apiVersion: v1
appVersion: "1.0"
description: A Helm chart for Kubernetes
name: gpushare-installer
version: 0.7.0

View File

@@ -0,0 +1,27 @@
### 0.1.0
* support gpushare deployment
### 0.2.0
* fix not recover gpu exclusive scheduling after removing gpushare
### 0.3.0
* support helm v3
### 0.4.0
* delete env kubeVersion
### 0.5.0
* change mount dir of host to /etc/kubernetes
### 0.6.0
* change statefulset to job
### 0.7.0
* Support unhealthy configmap

View File

@@ -0,0 +1,32 @@
{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
*/}}
{{- define "gpushare-installer.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "gpushare-installer.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "gpushare-installer.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}

View File

@@ -0,0 +1,43 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: device-plugin-evict-ds
namespace: kube-system
spec:
updateStrategy:
type: RollingUpdate
selector:
matchLabels:
component: gpushare-device-plugin
app: gpushare
name: device-plugin-evict-ds
template:
metadata:
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ""
labels:
component: gpushare-device-plugin
app: gpushare
name: device-plugin-evict-ds
spec:
hostNetwork: true
nodeSelector:
gpushare: "true"
containers:
- image: "{{ .Values.images.evictor.image }}:{{ .Values.images.evictor.tag }}"
imagePullPolicy: {{ .Values.images.evictor.pullPolicy }}
command:
- bash
- /dp-evict/dp-evict-on-host.sh
name: gpushare
# Make this pod as Guaranteed pod which will never be evicted because of node's resource consumption.
securityContext:
privileged: true
volumeMounts:
- name: kube-dir
mountPath: /etc/kubernetes
volumes:
- hostPath:
path: /etc/kubernetes
type: Directory
name: kube-dir

View File

@@ -0,0 +1,52 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: device-plugin-recover-ds
namespace: kube-system
spec:
updateStrategy:
type: RollingUpdate
selector:
matchLabels:
component: gpushare-device-plugin
app: gpushare
name: device-plugin-recover-ds
template:
metadata:
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ""
labels:
component: gpushare-device-plugin
app: gpushare
name: device-plugin-recover-ds
spec:
nodeSelector:
gpushare: "false"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: aliyun.accelerator/nvidia_count
operator: Exists
# nodeSelector:
# gpu-instance: "true"
hostNetwork: true
containers:
- image: "{{ .Values.images.recover.image }}:{{ .Values.images.recover.tag }}"
imagePullPolicy: {{ .Values.images.recover.pullPolicy }}
command:
- bash
- /dp-evict/dp-recover-on-host.sh
name: gpushare
# Make this pod as Guaranteed pod which will never be recovered because of node's resource consumption.
securityContext:
privileged: true
volumeMounts:
- name: kube-dir
mountPath: /etc/kubernetes
volumes:
- hostPath:
path: /etc/kubernetes
type: Directory
name: kube-dir

View File

@@ -0,0 +1,61 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: gpushare-device-plugin-ds
namespace: kube-system
spec:
selector:
matchLabels:
component: gpushare-device-plugin
app: gpushare
name: gpushare-device-plugin-ds
type: runtime
template:
metadata:
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ""
labels:
component: gpushare-device-plugin
app: gpushare
name: gpushare-device-plugin-ds
type: runtime
spec:
serviceAccount: gpushare-device-plugin
hostNetwork: true
nodeSelector:
gpushare: "true"
containers:
- image: "{{ .Values.images.devicePlugin.image }}:{{ .Values.images.devicePlugin.tag }}"
imagePullPolicy: {{ .Values.images.devicePlugin.pullPolicy }}
name: gpushare
# Make this pod as Guaranteed pod which will never be evicted because of node's resource consumption.
command:
- gpushare-device-plugin-v2
- -logtostderr
- --v=5
- --memory-unit=GiB
resources:
limits:
memory: "300Mi"
cpu: "1"
requests:
memory: "300Mi"
cpu: "1"
env:
- name: KUBECONFIG
value: /etc/kubernetes/kubelet.conf
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins

View File

@@ -0,0 +1,59 @@
# rbac.yaml
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: gpushare-device-plugin
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- apiGroups:
- ""
resources:
- pods
verbs:
- update
- patch
- get
- list
- watch
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch
- update
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: gpushare-device-plugin
namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: gpushare-device-plugin
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: gpushare-device-plugin
subjects:
- kind: ServiceAccount
name: gpushare-device-plugin
namespace: kube-system

View File

@@ -0,0 +1,45 @@
# deployment yaml
---
kind: Deployment
apiVersion: apps/v1
metadata:
name: gpushare-schd-extender
namespace: kube-system
spec:
selector:
matchLabels:
app: gpushare
component: gpushare-schd-extender
type: runtime
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: gpushare
component: gpushare-schd-extender
type: runtime
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
hostNetwork: true
tolerations:
- effect: NoSchedule
operator: Exists
key: node-role.kubernetes.io/master
- effect: NoSchedule
operator: Exists
key: node.cloudprovider.kubernetes.io/uninitialized
nodeSelector:
node-role.kubernetes.io/master: ""
serviceAccount: gpushare-schd-extender
containers:
- name: gpushare-schd-extender
image: "{{ .Values.images.extender.image }}:{{ .Values.images.extender.tag }}"
imagePullPolicy: {{ .Values.images.extender.pullPolicy }}
env:
- name: LOG_LEVEL
value: debug
- name: PORT
value: "12345"

View File

@@ -0,0 +1,67 @@
# rbac.yaml
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: gpushare-schd-extender
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- apiGroups:
- ""
resources:
- pods
verbs:
- update
- patch
- get
- list
- watch
- apiGroups:
- ""
resources:
- bindings
- pods/binding
verbs:
- create
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get
- list
- watch
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: gpushare-schd-extender
namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: gpushare-schd-extender
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: gpushare-schd-extender
subjects:
- kind: ServiceAccount
name: gpushare-schd-extender
namespace: kube-system

View File

@@ -0,0 +1,19 @@
apiVersion: v1
kind: Service
metadata:
name: gpushare-schd-extender
namespace: kube-system
labels:
app: gpushare
component: gpushare-schd-extender
spec:
# type: ClusterIP
type: NodePort
ports:
- port: 12345
name: http
targetPort: 12345
nodePort: 32766
selector:
app: gpushare
component: gpushare-schd-extender

View File

@@ -0,0 +1,66 @@
apiVersion: batch/v1
kind: Job
metadata:
name: gpushare-installer
namespace: kube-system
labels:
app: gpushare
name: gpushare-installer
chart: {{ template "gpushare-installer.chart" . }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
spec:
parallelism: {{ .Values.masterCount }}
template:
metadata:
labels:
chart: {{ template "gpushare-installer.chart" . }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
app: gpushare
name: gpushare-installer
spec:
hostNetwork: true
tolerations:
- effect: NoSchedule
operator: Exists
key: node-role.kubernetes.io/master
- effect: NoSchedule
operator: Exists
key: node.cloudprovider.kubernetes.io/uninitialized
nodeSelector:
node-role.kubernetes.io/master: ""
restartPolicy: OnFailure
containers:
- name: deploy-schd
image: "{{ .Values.images.installer.image }}:{{ .Values.images.installer.tag }}"
imagePullPolicy: {{ .Values.images.installer.pullPolicy }}
securityContext:
privileged: true
command:
- bash
- /schd-extender/install-sched-extender-on-host.sh
env:
- name: NODE_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
volumeMounts:
- name: kube-dir
mountPath: /etc/kubernetes
volumes:
- hostPath:
path: /etc/kubernetes
type: Directory
name: kube-dir
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: name
operator: In
values:
- gpushare-installer
topologyKey: "kubernetes.io/hostname"

View File

@@ -0,0 +1,29 @@
# Default values for gpushare-installer.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
masterCount: 3
images:
extender:
image: "registry.cn-beijing.aliyuncs.com/acs/k8s-gpushare-schd-extender"
tag: v1.0.0-ce6f800-aliyun
pullPolicy: IfNotPresent
installer:
image: "registry.cn-beijing.aliyuncs.com/acs/schd-extender-deployer"
tag: v1.0.0-b56d26d-aliyun
pullPolicy: IfNotPresent
devicePlugin:
image: "registry.cn-beijing.aliyuncs.com/acs/k8s-gpushare-plugin"
tag: v1.0.0-2656995-aliyun
pullPolicy: IfNotPresent
evictor:
image: "registry.cn-beijing.aliyuncs.com/acs/nvidia-device-plugin-evict"
tag: v1.0.0-b56d26d-aliyun
pullPolicy: IfNotPresent
recover:
image: "registry.cn-beijing.aliyuncs.com/acs/nvidia-device-plugin-recover"
tag: v1.0.0-b56d26d-aliyun
pullPolicy: IfNotPresent

View File

@@ -0,0 +1,12 @@
FROM debian:bullseye-slim
RUN echo \
deb [arch=amd64] http://mirrors.aliyun.com/debian/ bullseye main non-free contrib\
> /etc/apt/sources.list
RUN apt-get update
RUN apt-get install -y curl tzdata iproute2 bash && \
rm -rf /var/cache/apt/* && \
cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \
echo "Asia/Shanghai" > /etc/timezone && \
mkdir -p /dp-evict
ADD dp-evict /dp-evict
RUN chmod -R +x /dp-evict

View File

@@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -xe
BASEDIR=$(dirname "$0")
echo "build docker images in $BASEDIR"
TIMESTAMP=$(date +%Y%m%d%H%M)
cd $BASEDIR
# docker build --no-cache -t $IMAGE -f $FILE $BASEDIR
docker build --network=host -t registry.cn-hangzhou.aliyuncs.com/acs/nvidia-device-plugin-evict:$TIMESTAMP .
docker push registry.cn-hangzhou.aliyuncs.com/acs/nvidia-device-plugin-evict:$TIMESTAMP
echo registry.cn-hangzhou.aliyuncs.com/acs/nvidia-device-plugin-evict:$TIMESTAMP

View File

@@ -0,0 +1,31 @@
#!/usr/bin/env bash
set -e -x
backup_dir="/etc/kubernetes/manifests-backup"
public::common::log() {
echo $(date +"[%Y%m%d %H:%M:%S]: ") $1
}
public::evict::gpu-device-plugin() {
dir=/etc/kubernetes/manifests/
if [ -f /etc/kubernetes/manifests/nvidia-device-plugin.yml ]; then
backup_dir="/etc/kubernetes/manifests-backup/"
mkdir -p $backup_dir
mv /etc/kubernetes/manifests/nvidia-device-plugin.yml $backup_dir
else
public::common::log "Skip removing nvidia-device-plugin.yml, because it doesn't exist."
fi
}
main() {
public::evict::gpu-device-plugin
touch /ready
while sleep 3600; do :; done
}
main "$@"

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
set -xe
if [ -d "/k8s-host" ]; then
rm -rf /k8s-host/usr/local/dp-evict
mkdir -p /k8s-host/usr/local/dp-evict
cp -r /dp-evict/* /k8s-host/usr/local/dp-evict
chmod -R +x /k8s-host/usr/local/dp-evict
chroot /k8s-host /usr/local/dp-evict/dp-evict-on-host.sh "$@"
while sleep 3600; do :; done
fi

View File

@@ -0,0 +1,12 @@
FROM debian:bullseye-slim
RUN echo \
deb [arch=amd64] http://mirrors.aliyun.com/debian/ bullseye main non-free contrib\
> /etc/apt/sources.list
RUN apt-get update
RUN apt-get install -y curl tzdata iproute2 bash && \
rm -rf /var/cache/apt/* && \
cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \
echo "Asia/Shanghai" > /etc/timezone && \
mkdir -p /dp-evict
ADD dp-evict /dp-evict
RUN chmod -R +x /dp-evict

View File

@@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -xe
BASEDIR=$(dirname "$0")
echo "build docker images in $BASEDIR"
TIMESTAMP=$(date +%Y%m%d%H%M)
cd $BASEDIR
# docker build --no-cache -t $IMAGE -f $FILE $BASEDIR
docker build --network=host -t registry.cn-hangzhou.aliyuncs.com/acs/nvidia-device-plugin-recover:$TIMESTAMP .
docker push registry.cn-hangzhou.aliyuncs.com/acs/nvidia-device-plugin-recover:$TIMESTAMP
echo registry.cn-hangzhou.aliyuncs.com/acs/nvidia-device-plugin-recover:$TIMESTAMP

View File

@@ -0,0 +1,34 @@
#!/usr/bin/env bash
set -e -x
dir="/etc/kubernetes/manifests"
backup_dir="/etc/kubernetes/manifests-backup"
public::common::log() {
echo $(date +"[%Y%m%d %H:%M:%S]: ") $1
}
public::recover::gpu-device-plugin() {
if [ -f $dir/nvidia-device-plugin.yml ]; then
public::common::log "Skip recovering nvidia-device-plugin.yml, because it already exist."
else
if [ -f $backup_dir/nvidia-device-plugin.yml ]; then
cp -f $backup_dir/nvidia-device-plugin.yml $dir/nvidia-device-plugin.yml
public::common::log "Finish recovering nvidia-device-plugin.yml."
else
public::common::log "No nvidia-device-plugin.yml to recover."
fi
fi
}
main() {
public::recover::gpu-device-plugin
touch /ready
while sleep 3600; do :; done
}
main "$@"

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
set -xe
if [ -d "/k8s-host" ]; then
rm -rf /k8s-host/usr/local/dp-evict
mkdir -p /k8s-host/usr/local/dp-evict
cp -r /dp-evict/* /k8s-host/usr/local/dp-evict
chmod -R +x /k8s-host/usr/local/dp-evict
chroot /k8s-host /usr/local/dp-evict/dp-recover-on-host.sh "$@"
while sleep 3600; do :; done
fi

View File

@@ -0,0 +1,14 @@
FROM debian:bullseye-slim
RUN echo \
deb [arch=amd64] http://mirrors.aliyun.com/debian/ bullseye main non-free contrib\
> /etc/apt/sources.list
RUN apt-get update
RUN apt-get install -y curl tzdata iproute2 bash && \
rm -rf /var/cache/apt/* && \
cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \
echo "Asia/Shanghai" > /etc/timezone && \
mkdir -p /schd-extender
ADD schd-extender /schd-extender
RUN chmod -R +x /schd-extender

View File

@@ -0,0 +1,17 @@
#!/usr/bin/env bash
set -xe
BASEDIR=$(dirname "$0")
echo "build docker images in $BASEDIR"
TIMESTAMP=$(date +%Y%m%d%H%M)
cd $BASEDIR
# docker build --no-cache -t $IMAGE -f $FILE $BASEDIR
docker build -t registry.cn-hangzhou.aliyuncs.com/acs/schd-extender-deployer:$TIMESTAMP .
docker tag registry.cn-hangzhou.aliyuncs.com/acs/schd-extender-deployer:$TIMESTAMP cheyang/schd-extender-deployer:$TIMESTAMP
docker push registry.cn-hangzhou.aliyuncs.com/acs/schd-extender-deployer:$TIMESTAMP

View File

@@ -0,0 +1,75 @@
#!/usr/bin/env bash
set -e -x
dir=/etc/kubernetes/manifests
backup_dir="/etc/kubernetes/manifests-backup"
TIMESTAMP=$(date +%Y%m%d%H%M%S)
public::common::log() {
echo $(date +"[%Y%m%d %H:%M:%S]: ") $1
}
public::deployer::sche-policy-config() {
mkdir -p $backup_dir
if [ ! -f $backup_dir/kube-scheduler.ori.yaml ];then
cp /etc/kubernetes/manifests/kube-scheduler.yaml $backup_dir/kube-scheduler.ori.yaml
public::common::log "Backup $backup_dir/kube-scheduler.ori.yaml"
else
cp /etc/kubernetes/manifests/kube-scheduler.yaml $backup_dir/kube-scheduler-$TIMESTAMP.yaml
public::common::log "Backup $backup_dir/kube-scheduler-$TIMESTAMP.yaml"
fi
if [ ! -f $backup_dir/scheduler-policy-config.ori.json ];then
if [ -f /etc/kubernetes/scheduler-policy-config.json ];then
cp /etc/kubernetes/scheduler-policy-config.json $backup_dir/scheduler-policy-config.ori.json
public::common::log "Backup $backup_dir/scheduler-policy-config.ori.json"
fi
else
if [ -f /etc/kubernetes/scheduler-policy-config.json ];then
cp /etc/kubernetes/scheduler-policy-config.json $backup_dir/scheduler-policy-config-$TIMESTAMP.json
public::common::log "Backup $backup_dir/scheduler-policy-config-$TIMESTAMP.json"
fi
fi
public::common::log "Configure shceduler extender"
cp -f /schd-extender/scheduler-policy-config.json /etc/kubernetes/scheduler-policy-config.json
sed -i 's/127.0.0.1/'"${NODE_IP}"'/g' /etc/kubernetes/scheduler-policy-config.json
if ! grep 'deployment.kubernetes.io/revision' $dir/kube-scheduler.yaml; then
sed -i '/scheduler.alpha.kubernetes.io\/critical-pod/a \ deployment.kubernetes.io/revision: "'"${TIMESTAMP}"'"' $dir/kube-scheduler.yaml
else
# sed -i '/deployment.kubernetes.io\/revision/d' $dir/kube-scheduler.yaml
sed -i 's#deployment.kubernetes.io/revision:.*#deployment.kubernetes.io/revision: "'"${TIMESTAMP}"'"#' $dir/kube-scheduler.yaml
fi
if ! grep 'policy-config-file=/etc/kubernetes/scheduler-policy-config.json' $dir/kube-scheduler.yaml; then
sed -i "/- kube-scheduler/a\ \ \ \ - --policy-config-file=/etc/kubernetes/scheduler-policy-config.json" $dir/kube-scheduler.yaml
else
public::common::log "Skip the kube-scheduler config, because it's already configured extender."
fi
# add scheduler config policy volumeMounts
if ! grep 'mountPath: /etc/kubernetes/scheduler-policy-config.json' $dir/kube-scheduler.yaml; then
sed -i "/ volumeMounts:/a\ \ \ \ - mountPath: /etc/kubernetes/scheduler-policy-config.json\n name: scheduler-policy-config\n readOnly: true" $dir/kube-scheduler.yaml
else
public::common::log "Skip the scheduler-policy-config mountPath, because it's already configured extender."
fi
# add scheduler config policy volumes
if ! grep 'path: /etc/kubernetes/scheduler-policy-config.json' $dir/kube-scheduler.yaml; then
sed -i "/ volumes:/a \ - hostPath:\n path: /etc/kubernetes/scheduler-policy-config.json\n type: FileOrCreate\n name: scheduler-policy-config" $dir/kube-scheduler.yaml
else
public::common::log "Skip the scheduler-policy-config volumes, because it's already configured extender."
fi
}
main() {
public::deployer::sche-policy-config
touch /ready
#while sleep 3600; do :; done
}
main

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
set -xe
if [ -d "/k8s-host" ]; then
rm -rf /k8s-host/usr/local/k8s-schd-extender
mkdir -p /k8s-host/usr/local/k8s-schd-extender
cp -r /schd-extender/* /k8s-host/usr/local/k8s-schd-extender
chmod -R +x /k8s-host/usr/local/k8s-schd-extender/
chroot /k8s-host /usr/local/k8s-schd-extender/install-sched-extender-on-host.sh
while sleep 3600; do :; done
fi

View File

@@ -0,0 +1,20 @@
{
"kind": "Policy",
"apiVersion": "v1",
"extenders": [
{
"urlPrefix": "http://127.0.0.1:32766/gpushare-scheduler",
"filterVerb": "filter",
"bindVerb": "bind",
"enableHttps": false,
"nodeCacheCapable": true,
"managedResources": [
{
"name": "aliyun.com/gpu-mem",
"ignoredByScheduler": false
}
],
"ignorable": false
}
]
}

View File

@@ -0,0 +1,30 @@
# Adopters Of GPUShare Scheduler
Below are the adopters of project GPUShare Scheduler. If you are using GPUShare to improve the GPU utilization in Kubernetes, please feel free to add yourself into the following list by a pull request. There're several phases as follow:
* **Evaluation:** Known GPUShare Scheduler, that's interesting; evaluating the features/scopes of GPUShare Scheduler
* **Testing:** Take GPUShare Scheduler as one of candidates, testing Kubernetes cluster with GPUShare Scheduler
* **Staging:** Decide to use GPUShare Scheduler, testing it in pre-product environment
* **Production:** Already put GPUShare Scheduler into product environment
| Organization | Contact | Phases | Description of Use |
| ------------ | ------- | ----------- | ------------------ |
| [JianPei](http://www.jianpeicn.com/) | [@lisongtao716](https://github.com/lisongtao716) | Testing | Medical image analysis |
| [Unisound](https://www.unisound.com/) | [@xieydd](https://github.com/xieydd) | Testing | Unisound ATLAS AI Training Platform |
| [Bytedance](https://www.bytedance.com) | [@ryzzn](https://github.com/ryzzn) | Testing | Render Platform |
| [TIANCHI](https://tianchi.aliyun.com) | [@gaoxiaos](https://github.com/gaoxiaos) | Staging | AI Competition Platform|
| [TAL AI](https://ai.100tal.com) | [@asas12350](https://github.com/asas12350) | **Production** | AI Inference Service Platform|
| [HuyaTech](https://www.huya.com) | [@BobLiu20](https://github.com/BobLiu20) | **Production** | HUYA AI Platform |
| [QTT BigData](http://www.qutoutiao.net/) | [@OopsOutOfMemory](https://github.com/OopsOutOfMemory) | **Production** | QTT AI Platform |
| [Taobao](http://www.taobao.com) | [@zxthunter](https://github.com/zxthunter) | **Production** | NU Algorithm Deployment Platform |
| [Heuritech](http://www.heuritech.com) | [@heuritech](https://github.com/heuritech) | **Production** | AI Inference for Fashion |
| [AliyunIoT](https://iot.aliyun.com/) | [@falltodis](https://github.com/falltodis) | **Production** | IoT Edge AI Platform |
| [Jiangsu Telecom](https://wapjs.189.cn/) | [@yangyuliufeng](https://github.com/yangyuliufeng) | **Production** | AI Platform on k8s |
| [Aliyun Industry Brain](https://et.aliyun.com/brain/industry) | [@xlk23](https://github.com/xlk23) | **Production** | EPIC Platform |
| [Weibo](https://www.weibo.com) | [@phoenixwu0229](https://github.com/phoenixwu0229) | **Production** | Weibo ML Platform |
| [Zuo Ye Bang](http://www.zuoyebang.com) | [@xkos](https://github.com/xkos) | **Production** | AI Platform on k8s |
| [Hellobike](https://www.helloglobal.com) | [@gwl-wolf](https://github.com/gwl-wolf) | **Production** | AIBrain Platform |
| [Gomo](https://www.gomo.com) | [@cxxx](https://github.com/cxxx) | **Production** | Image conversion |
| [Qihoo 360](https://www.360.cn) | [@70data](https://github.com/70data) | **Production** | Private Cloud Platform on K8s |
| [DIDI](https://www.didiglobal.com/) | [@tongchao199](https://github.com/tongchao199) | **Production** | AI Experimental Environment Service <br> AI Inference Service |
| [Mango TV](https://www.mgtv.com) | [@ftx0day](https://github.com/ftx0day) | **Production** | Mango CloudNative AI Platform |

Binary file not shown.

After

Width:  |  Height:  |  Size: 229 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 183 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 217 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 204 KiB

View File

@@ -0,0 +1,105 @@
# GPU Sharing in Kubernetes
## Background
The Kubernetes infrastructure enforces exclusive GPU usage, preventing sharing GPUs across pods. This is not good for the users who want to use the sharing capabilities of NVIDIA GPUs to increase GPU utilization in a cluster.
This can achieve better isolation, ensuring that the GPU usage of each application is not affected by other applications; it is very suitable for deep learning model training scenarios, but it is usually wasteful if the scenarios are model development and model inference. In general, when we talk about shared GPUs support at cluster level, we usually think about two concepts:
1. Isolation: this is the basis for sharing a device, such as the fault isolation, memory isolation, real parallelism of the sharing of
the resource in each container at runtime level. It's inherently defined by the hardware device and the software controlling that device in the node, such as MPS (Mutiple Processing Service). In fact, Kubernetes helps a little in this.
2. Scheduling: Kubernetes should help the user to express his requirements in the way devices should be shared, and follow the user's specification to guarantee that devices cannot be oversubscribed at the scheduling level. However Kubernetes cannot in any measure enforce that at the runtime level.
For fine-grained GPU device scheduling, there is currently no good solution. This is because the extended resources such as GPU, RDMA in Kubernetes restricts quantities of extended resources to whole numbers, cannot support the allocation of complex resources. For example, it's impossible for a user to ask for 0.5 GPU in a Kubernetes cluster. The essential problem here is that multi-device GPU sharing is a vector resource problem, while extended resources are descriptions of scalar resources.
## User Story
- As a cluster administrator, I want to increase the GPU usage of the cluster; during the development process, multiple users share the same model development environment in the same GPU.
- As an application operator, I want to be able to run multiple inference tasks on the same GPU at the same time.
## Goals
- Allow users to express requests for sharing a resource, and guarantee that the GPU cannot be oversubscribed at the scheduling level.
## Non Goals
- Isolation of this shared resource
- Oversubscription
## Design Principles
- Although there are two ways to measure GPU capabilities (CUDA cores and GPU Memory), in the inference scenarios, we can make the assumption that the number of CUDA cores and GPU Memory are proportional.
- Leverage Extended Resources to express device sharing requests by changing the measure unit from "number of GPUs" to "amount of GPU memory in MiB". If the GPU used by the node is a single device with 16GiB of memory, it can be expressed as 16276MiB.
- The user's appeal for the shared GPU is for the model development and prediction scenario. In these cases, the upper limit of the GPU resource requested by the user does not exceed one GPU, that is, the resource limit of the application is a single GPU.
- Do not change any Kubernetes barebone code, just leverage extended resource, scheduler extender and device plugin mechanism.
## Design
Define two new Extended Resources: the first is gpu-mem, which corresponds to GPU memory; the second is gpu-count, which corresponds to the number of GPU devices.
The diagram below describes the architecture:
![](arch.jpg)
### Core components
- **GPU Share Scheduler Extender**: It uses the Kubernetes scheduler extender mechanism, it is responsible for determining whether a single GPU device on the node can provide enough GPU Memory when the global scheduler Filter and Bind, and records the GPU allocation result to the Pod Spec Annotation for subsequent filter at the time of Bind.
- **GPU Share Device Plugin**: It uses the Device Plugin mechanism, it is responsible for the allocation of the GPU device according to the decision of the GPU Share Scheduler Extender recorded on the Pod Spec Annotation.
### Process
#### 1\. Device Resource Report
The GPU Share Device Plugin uses the nvml library to query the number of GPU devices and the GPU memory of devices. The total GPU memory (quantity * memory) of the node is reported to the Kubelet by `ListAndWatch()`; and Kubelet reports these to the Kubernetes API Server.
If the node has 2 GPUs, and each GPU has 16276MiB, the GPU Memory of the node is 16276 * 2 = 32552. In addition, the number of GPU devices on the node is also reported as another Extended Resource.
#### 2\. Schedule
The GPU Share Scheduler Extender records the allocation information into annotations, and determine whether each GPU has enough gpu-mem according to this information when the scheduler is doing the filtering.
2.1. After the Kubernetes scheduler finishes all the default filters, it calls the filter method of the GPU Share Scheduler Extender through http. This is because the default scheduler calculates the extended resource and can only determine whether the total amount of resources has free resources that meet the demand. Specifically determine whether the demand is met on a single device; therefore, it is necessary to check whether a single device has available resources by the GPU Share Scheduler Extender.
The following figure shows an example. There are 3 nodes with 2 GPU devices in a Kubernetes cluster, when the user applies for `gpu-mem=8138`, the default scheduler scans all the nodes and finds that the remaining resources of N1 are (16276 * 2 - 16276 - 12207 = 4069) so the resource requirements are not met, therefore the N1 node is filtered out.
The remaining resources of the N2 and N3 nodes are 8138MiB. They all meet the requirements of the default scheduler. At this time, the default scheduler delegates the GPU Share Scheduler Extender to do secondary filtering.
During the secondary filtering, the GPU Share Scheduler Extender needs to determine whether the single GPU devices meets the resource requirements. When checking the N2 node, it is found that although the node has 8138MiB available resources, it is spread on two devices: GPU0 and GPU1 have only 4069MiB of available resources. It can't meet the requirement of a single device with 8138MiB.
Although the N3 node has a total of 8138MiB available resources, these available resources belong to GPU0, which satisfies the single device requirements. Thus, accurate scheduling can be achieved by filtering the GPU Share Scheduler Extender.
![](filter.jpg)
2.2. When the scheduler finds a node that satisfies the requirements, it delegates the GPU Share Scheduler Extender to bind the node and the pod. Here, the extender needs to do two things:
- Find the GPU device in the node according to the binpack rule, record the GPU device ID and save it as `ALIYUN_GPU_ID` in the annotations of the pod. It also saves the GPU Memory of the pod application as `ALIYUN_COM_GPU_MEM_POD` and `ALIYUN_COM_GPU_MEM_ASSUME_TIME` in the annotations of the pod. If no GPU is found at the binding time, no binding is performed at this time. The default scheduler will reschedule after the expiration timeout.
> Notice: There is also a pod annotation named `ALIYUN_COM_GPU_MEM_ASSIGNED` which is initialized as `false`. It indicates that the pod is assumed with the GPU device in the schedule period, but not assigned at the runtime.
- Bind pod and node with Kubernetes API
For example, a user wants to request a pod with gpu-mem:8138 and the node N1 is selected: the available resources of different GPUs are analyzed first, namely GPU0 (12207), GPU1 (8138), GPU2 (4069) and GPU3 (16276). The remaining resources of GPU2 (4069) is not satisfying so the device is discarded; in the other three GPUs that satisfy the requirements, GPU1 (8138), which has the least remaining resources, is selected.
![](bind.jpg)
#### 3\. Run the deployment on the node
An `Allocate` function in GPU Share Device Plugin is called from Kubelet before creating the container (the parameter of `Allocate` is the GPU memory request amount):
3.1 Get all the Pending and GPU Share pods with the GPU memory request amount in this node ordered by assumedTimestamp from Kubernetes API Server
3.2 Choose the Pod with the GPU memory request amount specified in the parameter of the `Allocate` function. There may be some pods with the same GPU memory request amount. If so, it chooses the pod with the earliest assumedTimestamp.
3.3 Mark the chosen pod's annotation `ALIYUN_COM_GPU_MEM_ASSIGNED` as `true`, and indicate that the GPU device is assigned to the container in the runtime.
![](sequence.jpg)

Binary file not shown.

After

Width:  |  Height:  |  Size: 209 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 209 KiB

View File

@@ -0,0 +1,138 @@
# Installation guide
## 0\. Prepare GPU Node
This guide assumes that the NVIDIA drivers and nvidia-docker2 have been installed.
Enable the Nvidia runtime as your default runtime on your node. To do this, please edit the docker daemon config file which is usually present at /etc/docker/daemon.json:
```json
{
"default-runtime": "nvidia",
"runtimes": {
"nvidia": {
"path": "/usr/bin/nvidia-container-runtime",
"runtimeArgs": []
}
}
}
```
> *if `runtimes` is not already present, head to the install page of [nvidia-docker](https://github.com/NVIDIA/nvidia-docker)*
## 1\. Deploy GPU share scheduler extender in control plane
```bash
kubectl create -f https://raw.githubusercontent.com/AliyunContainerService/gpushare-scheduler-extender/master/config/gpushare-schd-extender.yaml
```
## 2\. Modify scheduler configuration
The goal is to include `scheduler-policy-config.json` into the scheduler configuration (`/etc/kubernetes/manifests/kube-scheduler.yaml`).
> Notice: If your Kubernetes default scheduler is deployed as static pod, don't edit the yaml file inside /etc/kubernetes/manifest. You need to edit the yaml file outside the `/etc/kubernetes/manifest` directory. and copy the yaml file you edited to the '/etc/kubernetes/manifest/' directory, and then kubernetes will update the default static pod with the yaml file automatically.
### 2.1 Kubernetes v1.23+
From Kubernetes v1.23 [scheduling policies are no longer supported](https://kubernetes.io/docs/reference/scheduling/policies/) instead [scheduler configurations](https://kubernetes.io/docs/reference/scheduling/config/) should be used.
That means `scheduler-policy-config.yaml` needs to be included in the scheduler config (`/etc/kubernetes/manifests/kube-scheduler.yaml`).
Here is the sample of the final modified [kube-scheduler.yaml](../config/kube-scheduler-v1.23+.yaml)
#### 2.1.1 Copy scheduler config file into /etc/kubernetes
```bash
cd /etc/kubernetes
curl -O https://raw.githubusercontent.com/AliyunContainerService/gpushare-scheduler-extender/master/config/scheduler-policy-config.yaml
```
#### 2.1.2 Add Policy config file parameter in scheduler arguments
```yaml
- --config=/etc/kubernetes/scheduler-policy-config.yaml
```
#### 2.1.3 Add volume mount into Pod Spec
```yaml
- mountPath: /etc/kubernetes/scheduler-policy-config.yaml
name: scheduler-policy-config
readOnly: true
```
```yaml
- hostPath:
path: /etc/kubernetes/scheduler-policy-config.yaml
type: FileOrCreate
name: scheduler-policy-config
```
### 2.2 Before Kubernetes v1.23
Here is the sample of the final modified [kube-scheduler.yaml](../config/kube-scheduler.yaml)
#### 2.2.1 Copy scheduler config file into /etc/kubernetes
```bash
cd /etc/kubernetes
curl -O https://raw.githubusercontent.com/AliyunContainerService/gpushare-scheduler-extender/master/config/scheduler-policy-config.json
```
#### 2.2.2 Add Policy config file parameter in scheduler arguments
```yaml
- --policy-config-file=/etc/kubernetes/scheduler-policy-config.json
```
#### 2.2.3 Add volume mount into Pod Spec
```yaml
- mountPath: /etc/kubernetes/scheduler-policy-config.json
name: scheduler-policy-config
readOnly: true
```
```yaml
- hostPath:
path: /etc/kubernetes/scheduler-policy-config.json
type: FileOrCreate
name: scheduler-policy-config
```
## 3\. Deploy Device Plugin
```bash
kubectl create -f https://raw.githubusercontent.com/AliyunContainerService/gpushare-device-plugin/master/device-plugin-rbac.yaml
kubectl create -f https://raw.githubusercontent.com/AliyunContainerService/gpushare-device-plugin/master/device-plugin-ds.yaml
```
> Notice: please remove default GPU device plugin, for example, if you are using [nvidia-device-plugin](https://github.com/NVIDIA/k8s-device-plugin/blob/v1.11/nvidia-device-plugin.yml), you can run `kubectl delete ds -n kube-system nvidia-device-plugin-daemonset` to delete.
## 4\. Add gpushare node labels to the nodes requiring GPU sharing
You need to add a label "gpushare=true" to all node where you want to install device plugin because the device plugin is deamonset.
```bash
kubectl label node <target_node> gpushare=true
```
For example:
```bash
kubectl label node mynode gpushare=true
```
## 5\. Install Kubectl extension
### 5.1 Install kubectl 1.12 or above
You can download and install `kubectl` for linux
```bash
curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.12.1/bin/linux/amd64/kubectl
chmod +x ./kubectl
sudo mv ./kubectl /usr/bin/kubectl
```
### 5.2 Download and install the kubectl extension
```bash
cd /usr/bin/
wget https://github.com/AliyunContainerService/gpushare-device-plugin/releases/download/v0.3.0/kubectl-inspect-gpushare
chmod u+x /usr/bin/kubectl-inspect-gpushare
```

View File

@@ -0,0 +1,7 @@
## Problem Determination
1. If there is no way to find the gpushare node through `kubectl inspect gpushare`
1.1 kubectl get po -n kube-system -o=wide | grep gpushare-device
1.2 kubecl logs -n kube-system <pod_name>

View File

@@ -0,0 +1,79 @@
# User Guide
> Notice: Kubernetes provides GPU sharing scheduling capability, which is only a scheduling mechanism that
guarantees that devices can not be “oversubscribed” (at the scheduling level), but cannot in any
measure enforce that at the runtime level. For now, you have to take care of isolation by yourself.
1. Query the allocation status of the shared GPU
```bash
# kubectl inspect gpushare
NAME IPADDRESS GPU0(Allocated/Total) GPU Memory(GiB)
cn-shanghai.i-uf61h64dz1tmlob9hmtb 192.168.0.71 6/15 6/15
cn-shanghai.i-uf61h64dz1tmlob9hmtc 192.168.0.70 3/15 3/15
------------------------------------------------------------------------------
Allocated/Total GPU Memory In Cluster:
9/30 (30%)
```
> For more details, please run `kubectl inspect gpushare -d`
2. To request GPU sharing, you just need to specify `aliyun.com/gpu-mem`
```yaml
apiVersion: apps/v1beta1
kind: StatefulSet
metadata:
name: binpack-1
labels:
app: binpack-1
spec:
replicas: 3
serviceName: "binpack-1"
podManagementPolicy: "Parallel"
selector: # define how the deployment finds the pods it manages
matchLabels:
app: binpack-1
template: # define the pods specifications
metadata:
labels:
app: binpack-1
spec:
containers:
- name: binpack-1
image: cheyang/gpu-player:v2
resources:
limits:
# GiB
aliyun.com/gpu-mem: 3
```
> Notice that the GPU memory of each GPU is 3 GiB, 3 GiB indicates one third of the GPU.
3\. From the following environment variables,the application can limit the GPU usage by using CUDA API or framework API, such as Tensorflow
```bash
# The total amount of GPU memory on the current device (GiB)
ALIYUN_COM_GPU_MEM_DEV=15
# The GPU Memory of the container (GiB)
ALIYUN_COM_GPU_MEM_CONTAINER=3
```
Limit GPU memory by setting fraction through TensorFlow API
```python
fraction = round( 3 * 0.7 / 15 , 1 )
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = fraction
sess = tf.Session(config=config)
# Runs the op.
while True:
sess.run(c)
```
> 0.7 is because tensorflow control gpu memory is not accurate, it is recommended to multiply by 0.7 to ensure that the upper limit is not exceeded.

View File

@@ -0,0 +1,58 @@
module github.com/AliyunContainerService/gpushare-scheduler-extender
go 1.19
replace k8s.io/api => k8s.io/api v0.0.0-20221108053748-98c1aa6b3d0a
require (
github.com/julienschmidt/httprouter v1.3.0
go.uber.org/zap v1.23.0
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8
k8s.io/api v0.25.4
k8s.io/apimachinery v0.25.4
k8s.io/client-go v0.25.4
k8s.io/kube-scheduler v0.25.4
)
require (
github.com/PuerkitoBio/purell v1.1.1 // indirect
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emicklei/go-restful/v3 v3.8.0 // indirect
github.com/go-logr/logr v1.2.3 // indirect
github.com/go-openapi/jsonpointer v0.19.5 // indirect
github.com/go-openapi/jsonreference v0.19.5 // indirect
github.com/go-openapi/swag v0.19.14 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/google/gnostic v0.5.7-v3refs // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/imdario/mergo v0.3.6 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.6 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/spf13/pflag v1.0.5 // indirect
go.uber.org/atomic v1.7.0 // indirect
go.uber.org/multierr v1.6.0 // indirect
golang.org/x/net v0.1.1-0.20221027164007-c63010009c80 // indirect
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect
golang.org/x/sys v0.1.0 // indirect
golang.org/x/term v0.1.0 // indirect
golang.org/x/text v0.4.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.28.1 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/klog/v2 v2.80.1 // indirect
k8s.io/kube-openapi v0.0.0-20220803162953-67bda5d908f1 // indirect
k8s.io/utils v0.0.0-20221107191617-1a15be271d1d // indirect
sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
)

View File

@@ -0,0 +1,495 @@
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU=
cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc=
cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0=
cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To=
cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4=
cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M=
cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc=
cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk=
cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs=
cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc=
cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY=
cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc=
cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg=
cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc=
cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ=
cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk=
cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I=
cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw=
cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA=
cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU=
cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=
cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs=
cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/comail/colog v0.0.0-20160416085026-fba8e7b1f46c h1:bzYQ6WpR+t35/y19HUkolcg7SYeWZ15IclC9Z4naGHI=
github.com/comail/colog v0.0.0-20160416085026-fba8e7b1f46c/go.mod h1:1WwgAwMKQLYG5I2FBhpVx94YTOAuB2W59IZ7REjSE6Y=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE=
github.com/emicklei/go-restful/v3 v3.8.0 h1:eCZ8ulSerjdAiaNpF7GxXIE7ZCMo1moN1qX+S609eVw=
github.com/emicklei/go-restful/v3 v3.8.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY=
github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
github.com/go-openapi/jsonreference v0.19.5 h1:1WJP/wi4OjB4iV8KVbH73rQaoialJrqv8gitZLxGLtM=
github.com/go-openapi/jsonreference v0.19.5/go.mod h1:RdybgQwPxbL4UEjuAruzK1x3nE69AqPYEJeo/TWfEeg=
github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
github.com/go-openapi/swag v0.19.14 h1:gm3vOOXfiuw5i9p5N9xJvfjvuofpyvLA9Wr6QfK5Fng=
github.com/go-openapi/swag v0.19.14/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/gnostic v0.5.7-v3refs h1:FhTMOKj2VhjpouxvWJAV1TL304uMlb9zcDqkl6cEI54=
github.com/google/gnostic v0.5.7-v3refs/go.mod h1:73MKFl6jIHelAJNaBGFzt3SPtZULs9dYrGFt8OiIsHQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gofuzz v1.1.0 h1:Hsa8mG0dQ46ij8Sl2AYJDUv1oA9/d6Vk+3LG99Oe02g=
github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.1.2 h1:EVhdT+1Kseyi1/pUmXKaFxYsDNy9RQYkMWRH68J/W7Y=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28=
github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4dN7jwJOQ1U=
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA=
github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/onsi/ginkgo/v2 v2.1.6 h1:Fx2POJZfKRQcM1pH49qSZiYeu319wji004qX+GDovrU=
github.com/onsi/gomega v1.20.1 h1:PA/3qinGoukvymdIDV8pii6tiZgC8kbmJO6Z5+b002Q=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI=
go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4=
go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
go.uber.org/zap v1.23.0 h1:OjGQ5KQDEUawVHxNwQgPpiypGHOxo2mNZsOqTak4fFY=
go.uber.org/zap v1.23.0/go.mod h1:D+nX8jyLsMHMYrln8A0rJjFt/T/9/bGgIhAqxv5URuY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek=
golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=
golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.1.1-0.20221027164007-c63010009c80 h1:CtRWmqbiPSOXwJV1JoY7pWiTx2xzVKQ813bvU+Y/9jI=
golang.org/x/net v0.1.1-0.20221027164007-c63010009c80/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 h1:RerP+noqYHUQ8CMRcPlC2nvTa4dcBIjegkuWdcUDuqg=
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.1.0 h1:g6Z6vPFA9dYBAF7DWcH6sCcOntplXsDKcliusYijMlw=
golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg=
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 h1:vVKdlvoWBphwdxWKrFZEuM0kGgGLxUOYcY4U/2Vjg44=
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8=
golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM=
google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA=
google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA=
google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20201019141844-1ed22bb0c154/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60=
google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w=
google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
k8s.io/api v0.0.0-20221108053748-98c1aa6b3d0a h1:GaCla9HtNyi63kysI/cyeA4bv6wRkIyuiUeXpaTF+dw=
k8s.io/api v0.0.0-20221108053748-98c1aa6b3d0a/go.mod h1:PSXY9/fSNyKgKHUU+O9scnZiW8m+V1znqk49oI6hAEY=
k8s.io/apimachinery v0.25.4 h1:CtXsuaitMESSu339tfhVXhQrPET+EiWnIY1rcurKnAc=
k8s.io/apimachinery v0.25.4/go.mod h1:jaF9C/iPNM1FuLl7Zuy5b9v+n35HGSh6AQ4HYRkCqwo=
k8s.io/client-go v0.25.4 h1:3RNRDffAkNU56M/a7gUfXaEzdhZlYhoW8dgViGy5fn8=
k8s.io/client-go v0.25.4/go.mod h1:8trHCAC83XKY0wsBIpbirZU4NTUpbuhc2JnI7OruGZw=
k8s.io/klog/v2 v2.80.1 h1:atnLQ121W371wYYFawwYx1aEY2eUfs4l3J72wtgAwV4=
k8s.io/klog/v2 v2.80.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0=
k8s.io/kube-openapi v0.0.0-20220803162953-67bda5d908f1 h1:MQ8BAZPZlWk3S9K4a9NCkIFQtZShWqoha7snGixVgEA=
k8s.io/kube-openapi v0.0.0-20220803162953-67bda5d908f1/go.mod h1:C/N6wCaBHeBHkHUesQOQy2/MZqGgMAFPqGsGQLdbZBU=
k8s.io/kube-scheduler v0.25.4 h1:/AVGKBw8akKDdlYeJ8ki87Fkje517Kq7hxpdsqyOXOM=
k8s.io/kube-scheduler v0.25.4/go.mod h1:o3NLBkBhJMI7QjERmWwi/uLE0+YijV1xxWmDmWeZTvE=
k8s.io/utils v0.0.0-20221107191617-1a15be271d1d h1:0Smp/HP1OH4Rvhe+4B8nWGERtlqAGSftbSbbmm45oFs=
k8s.io/utils v0.0.0-20221107191617-1a15be271d1d/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=
sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 h1:iXTIw73aPyC+oRdyqqvVJuloN1p0AC/kzH07hu3NE+k=
sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 h1:PRbqxJClWWYMNV1dhaG4NsibJbArud9kFxnAMREiWFE=
sigs.k8s.io/structured-merge-diff/v4 v4.2.3/go.mod h1:qjx8mGObPmV2aSZepjQjbmb2ihdVs8cGKBraizNC69E=
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=

View File

@@ -0,0 +1,177 @@
package cache
import (
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
"sync"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/utils"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
corelisters "k8s.io/client-go/listers/core/v1"
)
type SchedulerCache struct {
// a map from pod key to podState.
nodes map[string]*NodeInfo
// nodeLister can list/get nodes from the shared informer's store.
nodeLister corelisters.NodeLister
//
podLister corelisters.PodLister
// record the knownPod, it will be added when annotation ALIYUN_GPU_ID is added, and will be removed when complete and deleted
knownPods map[types.UID]*v1.Pod
nLock *sync.RWMutex
}
func NewSchedulerCache(nLister corelisters.NodeLister, pLister corelisters.PodLister) *SchedulerCache {
return &SchedulerCache{
nodes: make(map[string]*NodeInfo),
nodeLister: nLister,
podLister: pLister,
knownPods: make(map[types.UID]*v1.Pod),
nLock: new(sync.RWMutex),
}
}
func (cache *SchedulerCache) GetNodeinfos() []*NodeInfo {
nodes := []*NodeInfo{}
for _, n := range cache.nodes {
nodes = append(nodes, n)
}
return nodes
}
// build cache when initializing
func (cache *SchedulerCache) BuildCache() error {
log.V(5).Info("debug: begin to build scheduler cache")
pods, err := cache.podLister.List(labels.Everything())
if err != nil {
return err
} else {
for _, pod := range pods {
if utils.GetGPUMemoryFromPodAnnotation(pod) <= uint(0) {
continue
}
if len(pod.Spec.NodeName) == 0 {
continue
}
err = cache.AddOrUpdatePod(pod)
if err != nil {
return err
}
}
}
return nil
}
func (cache *SchedulerCache) GetPod(name, namespace string) (*v1.Pod, error) {
return cache.podLister.Pods(namespace).Get(name)
}
// Get known pod from the pod UID
func (cache *SchedulerCache) KnownPod(podUID types.UID) bool {
cache.nLock.RLock()
defer cache.nLock.RUnlock()
_, found := cache.knownPods[podUID]
return found
}
func (cache *SchedulerCache) AddOrUpdatePod(pod *v1.Pod) error {
log.V(100).Info("debug: Add or update pod info: %v", pod)
log.V(100).Info("debug: Node %v", cache.nodes)
if len(pod.Spec.NodeName) == 0 {
log.V(100).Info("debug: pod %s in ns %s is not assigned to any node, skip", pod.Name, pod.Namespace)
return nil
}
n, err := cache.GetNodeInfo(pod.Spec.NodeName)
if err != nil {
return err
}
podCopy := pod.DeepCopy()
if n.addOrUpdatePod(podCopy) {
// put it into known pod
cache.rememberPod(pod.UID, podCopy)
} else {
log.V(100).Info("debug: pod %s in ns %s's gpu id is %d, it's illegal, skip",
pod.Name,
pod.Namespace,
utils.GetGPUIDFromAnnotation(pod))
}
return nil
}
// The lock is in cacheNode
func (cache *SchedulerCache) RemovePod(pod *v1.Pod) {
log.V(100).Info("debug: Remove pod info: %v", pod)
log.V(100).Info("debug: Node %v", cache.nodes)
n, err := cache.GetNodeInfo(pod.Spec.NodeName)
if err == nil {
n.removePod(pod)
} else {
log.V(10).Info("debug: Failed to get node %s due to %v", pod.Spec.NodeName, err)
}
cache.forgetPod(pod.UID)
}
// Get or build nodeInfo if it doesn't exist
func (cache *SchedulerCache) GetNodeInfo(name string) (*NodeInfo, error) {
node, err := cache.nodeLister.Get(name)
if err != nil {
return nil, err
}
cache.nLock.Lock()
defer cache.nLock.Unlock()
n, ok := cache.nodes[name]
if !ok {
n = NewNodeInfo(node)
cache.nodes[name] = n
} else {
// if the existing node turn from non gpushare to gpushare
// if (utils.GetTotalGPUMemory(n.node) <= 0 && utils.GetTotalGPUMemory(node) > 0) ||
// (utils.GetGPUCountInNode(n.node) <= 0 && utils.GetGPUCountInNode(node) > 0) ||
// // if the existing node turn from gpushare to non gpushare
// (utils.GetTotalGPUMemory(n.node) > 0 && utils.GetTotalGPUMemory(node) <= 0) ||
// (utils.GetGPUCountInNode(n.node) > 0 && utils.GetGPUCountInNode(node) <= 0) {
if len(cache.nodes[name].devs) == 0 ||
utils.GetTotalGPUMemory(n.node) <= 0 ||
utils.GetGPUCountInNode(n.node) <= 0 {
log.V(10).Info("info: GetNodeInfo() need update node %s",
name)
// fix the scenario that the number of devices changes from 0 to an positive number
cache.nodes[name].Reset(node)
log.V(10).Info("info: node: %s, labels from cache after been updated: %v", n.node.Name, n.node.Labels)
} else {
log.V(10).Info("info: GetNodeInfo() uses the existing nodeInfo for %s", name)
}
log.V(100).Info("debug: node %s with devices %v", name, n.devs)
}
return n, nil
}
func (cache *SchedulerCache) forgetPod(uid types.UID) {
cache.nLock.Lock()
defer cache.nLock.Unlock()
delete(cache.knownPods, uid)
}
func (cache *SchedulerCache) rememberPod(uid types.UID, pod *v1.Pod) {
cache.nLock.Lock()
defer cache.nLock.Unlock()
cache.knownPods[pod.UID] = pod
}

View File

@@ -0,0 +1,33 @@
package cache
import (
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
apierrors "k8s.io/apimachinery/pkg/api/errors"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
corelisters "k8s.io/client-go/listers/core/v1"
clientgocache "k8s.io/client-go/tools/cache"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
var (
ConfigMapLister corelisters.ConfigMapLister
ConfigMapInformerSynced clientgocache.InformerSynced
)
func getConfigMap(name string) *v1.ConfigMap {
configMap, err := ConfigMapLister.ConfigMaps(metav1.NamespaceSystem).Get(name)
// If we can't get the configmap just return nil. The resync will eventually
// sync things up.
if err != nil {
if !apierrors.IsNotFound(err) {
log.V(10).Info("warn: find configmap with error: %v", err)
utilruntime.HandleError(err)
}
return nil
}
return configMap
}

View File

@@ -0,0 +1,80 @@
package cache
import (
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
"sync"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/utils"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
)
type DeviceInfo struct {
idx int
podMap map[types.UID]*v1.Pod
// usedGPUMem uint
totalGPUMem uint
rwmu *sync.RWMutex
}
func (d *DeviceInfo) GetPods() []*v1.Pod {
pods := []*v1.Pod{}
for _, pod := range d.podMap {
pods = append(pods, pod)
}
return pods
}
func newDeviceInfo(index int, totalGPUMem uint) *DeviceInfo {
return &DeviceInfo{
idx: index,
totalGPUMem: totalGPUMem,
podMap: map[types.UID]*v1.Pod{},
rwmu: new(sync.RWMutex),
}
}
func (d *DeviceInfo) GetTotalGPUMemory() uint {
return d.totalGPUMem
}
func (d *DeviceInfo) GetUsedGPUMemory() (gpuMem uint) {
log.V(100).Info("debug: GetUsedGPUMemory() podMap %v, and its address is %p", d.podMap, d)
d.rwmu.RLock()
defer d.rwmu.RUnlock()
for _, pod := range d.podMap {
if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
log.V(100).Info("debug: skip the pod %s in ns %s due to its status is %s", pod.Name, pod.Namespace, pod.Status.Phase)
continue
}
// gpuMem += utils.GetGPUMemoryFromPodEnv(pod)
gpuMem += utils.GetGPUMemoryFromPodAnnotation(pod)
}
return gpuMem
}
func (d *DeviceInfo) addPod(pod *v1.Pod) {
log.V(100).Info("debug: dev.addPod() Pod %s in ns %s with the GPU ID %d will be added to device map",
pod.Name,
pod.Namespace,
d.idx)
d.rwmu.Lock()
defer d.rwmu.Unlock()
d.podMap[pod.UID] = pod
log.V(100).Info("debug: dev.addPod() after updated is %v, and its address is %p",
d.podMap,
d)
}
func (d *DeviceInfo) removePod(pod *v1.Pod) {
log.V(100).Info("debug: dev.removePod() Pod %s in ns %s with the GPU ID %d will be removed from device map",
pod.Name,
pod.Namespace,
d.idx)
d.rwmu.Lock()
defer d.rwmu.Unlock()
delete(d.podMap, pod.UID)
log.V(100).Info("debug: dev.removePod() after updated is %v, and its address is %p",
d.podMap,
d)
}

View File

@@ -0,0 +1,362 @@
package cache
import (
"context"
"fmt"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
"strconv"
"strings"
"sync"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/utils"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
)
const (
OptimisticLockErrorMsg = "the object has been modified; please apply your changes to the latest version and try again"
)
// NodeInfo is node level aggregated information.
type NodeInfo struct {
ctx context.Context
name string
node *v1.Node
devs map[int]*DeviceInfo
gpuCount int
gpuTotalMemory int
rwmu *sync.RWMutex
}
// Create Node Level
func NewNodeInfo(node *v1.Node) *NodeInfo {
log.V(10).Info("debug: NewNodeInfo() creates nodeInfo for %s", node.Name)
devMap := map[int]*DeviceInfo{}
for i := 0; i < utils.GetGPUCountInNode(node); i++ {
devMap[i] = newDeviceInfo(i, uint(utils.GetTotalGPUMemory(node)/utils.GetGPUCountInNode(node)))
}
if len(devMap) == 0 {
log.V(3).Info("warn: node %s with nodeinfo %v has no devices", node.Name, node)
}
return &NodeInfo{
ctx: context.Background(),
name: node.Name,
node: node,
devs: devMap,
gpuCount: utils.GetGPUCountInNode(node),
gpuTotalMemory: utils.GetTotalGPUMemory(node),
rwmu: new(sync.RWMutex),
}
}
// Only update the devices when the length of devs is 0
func (n *NodeInfo) Reset(node *v1.Node) {
n.gpuCount = utils.GetGPUCountInNode(node)
n.gpuTotalMemory = utils.GetTotalGPUMemory(node)
n.node = node
if n.gpuCount == 0 {
log.V(3).Info("warn: Reset for node %s but the gpu count is 0", node.Name)
}
if n.gpuTotalMemory == 0 {
log.V(3).Info("warn: Reset for node %s but the gpu total memory is 0", node.Name)
}
if len(n.devs) == 0 && n.gpuCount > 0 {
devMap := map[int]*DeviceInfo{}
for i := 0; i < utils.GetGPUCountInNode(node); i++ {
devMap[i] = newDeviceInfo(i, uint(n.gpuTotalMemory/n.gpuCount))
}
n.devs = devMap
}
log.V(3).Info("info: Reset() update nodeInfo for %s with devs %v", node.Name, n.devs)
}
func (n *NodeInfo) GetName() string {
return n.name
}
func (n *NodeInfo) GetDevs() []*DeviceInfo {
devs := make([]*DeviceInfo, n.gpuCount)
for i, dev := range n.devs {
devs[i] = dev
}
return devs
}
func (n *NodeInfo) GetNode() *v1.Node {
return n.node
}
func (n *NodeInfo) GetTotalGPUMemory() int {
return n.gpuTotalMemory
}
func (n *NodeInfo) GetGPUCount() int {
return n.gpuCount
}
func (n *NodeInfo) removePod(pod *v1.Pod) {
n.rwmu.Lock()
defer n.rwmu.Unlock()
id := utils.GetGPUIDFromAnnotation(pod)
if id >= 0 {
dev, found := n.devs[id]
if !found {
log.V(3).Info("warn: Pod %s in ns %s failed to find the GPU ID %d in node %s", pod.Name, pod.Namespace, id, n.name)
} else {
dev.removePod(pod)
}
} else {
log.V(3).Info("warn: Pod %s in ns %s is not set the GPU ID %d in node %s", pod.Name, pod.Namespace, id, n.name)
}
}
// Add the Pod which has the GPU id to the node
func (n *NodeInfo) addOrUpdatePod(pod *v1.Pod) (added bool) {
n.rwmu.Lock()
defer n.rwmu.Unlock()
id := utils.GetGPUIDFromAnnotation(pod)
log.V(3).Info("debug: addOrUpdatePod() Pod %s in ns %s with the GPU ID %d should be added to device map",
pod.Name,
pod.Namespace,
id)
if id >= 0 {
dev, found := n.devs[id]
if !found {
log.V(3).Info("warn: Pod %s in ns %s failed to find the GPU ID %d in node %s", pod.Name, pod.Namespace, id, n.name)
} else {
dev.addPod(pod)
added = true
}
} else {
log.V(3).Info("warn: Pod %s in ns %s is not set the GPU ID %d in node %s", pod.Name, pod.Namespace, id, n.name)
}
return added
}
// check if the pod can be allocated on the node
func (n *NodeInfo) Assume(pod *v1.Pod) (allocatable bool) {
allocatable = false
n.rwmu.RLock()
defer n.rwmu.RUnlock()
availableGPUs := n.getAvailableGPUs()
reqGPU := uint(utils.GetGPUMemoryFromPodResource(pod))
log.V(10).Info("debug: AvailableGPUs: %v in node %s", availableGPUs, n.name)
if len(availableGPUs) > 0 {
for devID := 0; devID < len(n.devs); devID++ {
availableGPU, ok := availableGPUs[devID]
if ok {
if availableGPU >= reqGPU {
allocatable = true
break
}
}
}
}
return allocatable
}
func (n *NodeInfo) Allocate(clientset *kubernetes.Clientset, pod *v1.Pod) (err error) {
var newPod *v1.Pod
n.rwmu.Lock()
defer n.rwmu.Unlock()
log.V(3).Info("info: Allocate() ----Begin to allocate GPU for gpu mem for pod %s in ns %s----", pod.Name, pod.Namespace)
// 1. Update the pod spec
devId, found := n.allocateGPUID(pod)
if found {
log.V(3).Info("info: Allocate() 1. Allocate GPU ID %d to pod %s in ns %s.----", devId, pod.Name, pod.Namespace)
// newPod := utils.GetUpdatedPodEnvSpec(pod, devId, nodeInfo.GetTotalGPUMemory()/nodeInfo.GetGPUCount())
//newPod = utils.GetUpdatedPodAnnotationSpec(pod, devId, n.GetTotalGPUMemory()/n.GetGPUCount())
patchedAnnotationBytes, err := utils.PatchPodAnnotationSpec(pod, devId, n.GetTotalGPUMemory()/n.GetGPUCount())
if err != nil {
return fmt.Errorf("failed to generate patched annotations,reason: %v", err)
}
newPod, err = clientset.CoreV1().Pods(pod.Namespace).Patch(n.ctx, pod.Name, types.StrategicMergePatchType, patchedAnnotationBytes, metav1.PatchOptions{})
//_, err = clientset.CoreV1().Pods(newPod.Namespace).Update(newPod)
if err != nil {
// the object has been modified; please apply your changes to the latest version and try again
if err.Error() == OptimisticLockErrorMsg {
// retry
pod, err = clientset.CoreV1().Pods(pod.Namespace).Get(n.ctx, pod.Name, metav1.GetOptions{})
if err != nil {
return err
}
// newPod = utils.GetUpdatedPodEnvSpec(pod, devId, nodeInfo.GetTotalGPUMemory()/nodeInfo.GetGPUCount())
//newPod = utils.GetUpdatedPodAnnotationSpec(pod, devId, n.GetTotalGPUMemory()/n.GetGPUCount())
//_, err = clientset.CoreV1().Pods(newPod.Namespace).Update(newPod)
newPod, err = clientset.CoreV1().Pods(pod.Namespace).Patch(n.ctx, pod.Name, types.StrategicMergePatchType, patchedAnnotationBytes, metav1.PatchOptions{})
if err != nil {
return err
}
} else {
log.V(3).Info("failed to patch pod %v", pod)
return err
}
}
} else {
err = fmt.Errorf("The node %s can't place the pod %s in ns %s,and the pod spec is %v", pod.Spec.NodeName, pod.Name, pod.Namespace, pod)
}
// 2. Bind the pod to the node
if err == nil {
binding := &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Name: pod.Name, UID: pod.UID},
Target: v1.ObjectReference{Kind: "Node", Name: n.name},
}
log.V(3).Info("info: Allocate() 2. Try to bind pod %s in %s namespace to node %s with %v",
pod.Name,
pod.Namespace,
pod.Spec.NodeName,
binding)
err = clientset.CoreV1().Pods(pod.Namespace).Bind(n.ctx, binding, metav1.CreateOptions{})
if err != nil {
log.V(3).Info("warn: Failed to bind the pod %s in ns %s due to %v", pod.Name, pod.Namespace, err)
return err
}
}
// 3. update the device info if the pod is update successfully
if err == nil {
log.V(3).Info("info: Allocate() 3. Try to add pod %s in ns %s to dev %d",
pod.Name,
pod.Namespace,
devId)
dev, found := n.devs[devId]
if !found {
log.V(3).Info("warn: Pod %s in ns %s failed to find the GPU ID %d in node %s", pod.Name, pod.Namespace, devId, n.name)
} else {
dev.addPod(newPod)
}
}
log.V(3).Info("info: Allocate() ----End to allocate GPU for gpu mem for pod %s in ns %s----", pod.Name, pod.Namespace)
return err
}
// allocate the GPU ID to the pod
func (n *NodeInfo) allocateGPUID(pod *v1.Pod) (candidateDevID int, found bool) {
reqGPU := uint(0)
found = false
candidateDevID = -1
candidateGPUMemory := uint(0)
availableGPUs := n.getAvailableGPUs()
reqGPU = uint(utils.GetGPUMemoryFromPodResource(pod))
if reqGPU > uint(0) {
log.V(3).Info("info: reqGPU for pod %s in ns %s: %d", pod.Name, pod.Namespace, reqGPU)
log.V(3).Info("info: AvailableGPUs: %v in node %s", availableGPUs, n.name)
if len(availableGPUs) > 0 {
for devID := 0; devID < len(n.devs); devID++ {
availableGPU, ok := availableGPUs[devID]
if ok {
if availableGPU >= reqGPU {
if candidateDevID == -1 || candidateGPUMemory > availableGPU {
candidateDevID = devID
candidateGPUMemory = availableGPU
}
found = true
}
}
}
}
if found {
log.V(3).Info("info: Find candidate dev id %d for pod %s in ns %s successfully.",
candidateDevID,
pod.Name,
pod.Namespace)
} else {
log.V(3).Info("warn: Failed to find available GPUs %d for the pod %s in the namespace %s",
reqGPU,
pod.Name,
pod.Namespace)
}
}
return candidateDevID, found
}
func (n *NodeInfo) getAvailableGPUs() (availableGPUs map[int]uint) {
allGPUs := n.getAllGPUs()
usedGPUs := n.getUsedGPUs()
unhealthyGPUs := n.getUnhealthyGPUs()
availableGPUs = map[int]uint{}
for id, totalGPUMem := range allGPUs {
if usedGPUMem, found := usedGPUs[id]; found {
availableGPUs[id] = totalGPUMem - usedGPUMem
}
}
log.V(3).Info("info: available GPU list %v before removing unhealty GPUs", availableGPUs)
for id, _ := range unhealthyGPUs {
log.V(3).Info("info: delete dev %d from availble GPU list", id)
delete(availableGPUs, id)
}
log.V(3).Info("info: available GPU list %v after removing unhealty GPUs", availableGPUs)
return availableGPUs
}
// device index: gpu memory
func (n *NodeInfo) getUsedGPUs() (usedGPUs map[int]uint) {
usedGPUs = map[int]uint{}
for _, dev := range n.devs {
usedGPUs[dev.idx] = dev.GetUsedGPUMemory()
}
log.V(3).Info("info: getUsedGPUs: %v in node %s, and devs %v", usedGPUs, n.name, n.devs)
return usedGPUs
}
// device index: gpu memory
func (n *NodeInfo) getAllGPUs() (allGPUs map[int]uint) {
allGPUs = map[int]uint{}
for _, dev := range n.devs {
allGPUs[dev.idx] = dev.totalGPUMem
}
log.V(3).Info("info: getAllGPUs: %v in node %s, and dev %v", allGPUs, n.name, n.devs)
return allGPUs
}
// getUnhealthyGPUs get the unhealthy GPUs from configmap
func (n *NodeInfo) getUnhealthyGPUs() (unhealthyGPUs map[int]bool) {
unhealthyGPUs = map[int]bool{}
name := fmt.Sprintf("unhealthy-gpu-%s", n.GetName())
log.V(3).Info("info: try to find unhealthy node %s", name)
cm := getConfigMap(name)
if cm == nil {
return
}
if devicesStr, found := cm.Data["gpus"]; found {
log.V(3).Info("warn: the unhelathy gpus %s", devicesStr)
idsStr := strings.Split(devicesStr, ",")
for _, sid := range idsStr {
id, err := strconv.Atoi(sid)
if err != nil {
log.V(3).Info("warn: failed to parse id %s due to %v", sid, err)
}
unhealthyGPUs[id] = true
}
} else {
log.V(3).Info("info: skip, because there are no unhealthy gpus")
}
return
}

View File

@@ -0,0 +1,346 @@
package gpushare
import (
"fmt"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
"golang.org/x/time/rate"
"time"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/cache"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/utils"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
kubeinformers "k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
clientgocache "k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/record"
)
var (
KeyFunc = clientgocache.DeletionHandlingMetaNamespaceKeyFunc
)
type Controller struct {
clientset *kubernetes.Clientset
// podLister can list/get pods from the shared informer's store.
podLister corelisters.PodLister
// nodeLister can list/get nodes from the shared informer's store.
nodeLister corelisters.NodeLister
// podQueue is a rate limited work queue. This is used to queue work to be
// processed instead of performing it as soon as a change happens. This
// means we can ensure we only process a fixed amount of resources at a
// time, and makes it easy to ensure we are never processing the same item
// simultaneously in two different workers.
podQueue workqueue.RateLimitingInterface
// recorder is an event recorder for recording Event resources to the
// Kubernetes API.
recorder record.EventRecorder
// podInformerSynced returns true if the pod store has been synced at least once.
podInformerSynced clientgocache.InformerSynced
// nodeInformerSynced returns true if the service store has been synced at least once.
nodeInformerSynced clientgocache.InformerSynced
schedulerCache *cache.SchedulerCache
// The cache to store the pod to be removed
removePodCache map[string]*v1.Pod
}
func NewController(clientset *kubernetes.Clientset, kubeInformerFactory kubeinformers.SharedInformerFactory, stopCh <-chan struct{}) (*Controller, error) {
log.V(100).Info("info: Creating event broadcaster")
eventBroadcaster := record.NewBroadcaster()
// eventBroadcaster.StartLogging(log.Infof)
eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: clientset.CoreV1().Events("")})
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "gpushare-schd-extender"})
rateLimiter := workqueue.NewMaxOfRateLimiter(
workqueue.NewItemExponentialFailureRateLimiter(5*time.Millisecond, 1000*time.Second),
&workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(100), 500)},
)
c := &Controller{
clientset: clientset,
podQueue: workqueue.NewNamedRateLimitingQueue(rateLimiter, "podQueue"),
recorder: recorder,
removePodCache: map[string]*v1.Pod{},
}
// Create pod informer.
podInformer := kubeInformerFactory.Core().V1().Pods()
podInformer.Informer().AddEventHandler(clientgocache.FilteringResourceEventHandler{
FilterFunc: func(obj interface{}) bool {
switch t := obj.(type) {
case *v1.Pod:
// log.V(100).Info("debug: added pod %s in ns %s", t.Name, t.Namespace)
return utils.IsGPUsharingPod(t)
case clientgocache.DeletedFinalStateUnknown:
if pod, ok := t.Obj.(*v1.Pod); ok {
log.V(100).Info("debug: delete pod %s in ns %s", pod.Name, pod.Namespace)
return utils.IsGPUsharingPod(pod)
}
runtime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, c))
return false
default:
runtime.HandleError(fmt.Errorf("unable to handle object in %T: %T", c, obj))
return false
}
},
Handler: clientgocache.ResourceEventHandlerFuncs{
AddFunc: c.addPodToCache,
UpdateFunc: c.updatePodInCache,
DeleteFunc: c.deletePodFromCache,
},
})
c.podLister = podInformer.Lister()
c.podInformerSynced = podInformer.Informer().HasSynced
// Create node informer
nodeInformer := kubeInformerFactory.Core().V1().Nodes()
c.nodeLister = nodeInformer.Lister()
c.nodeInformerSynced = nodeInformer.Informer().HasSynced
// Create configMap informer
cmInformer := kubeInformerFactory.Core().V1().ConfigMaps()
cache.ConfigMapLister = cmInformer.Lister()
cache.ConfigMapInformerSynced = cmInformer.Informer().HasSynced
// Start informer goroutines.
go kubeInformerFactory.Start(stopCh)
// Create scheduler Cache
c.schedulerCache = cache.NewSchedulerCache(c.nodeLister, c.podLister)
log.V(100).Info("info: begin to wait for cache")
if ok := clientgocache.WaitForCacheSync(stopCh, c.nodeInformerSynced); !ok {
return nil, fmt.Errorf("failed to wait for node caches to sync")
} else {
log.V(100).Info("info: init the node cache successfully")
}
if ok := clientgocache.WaitForCacheSync(stopCh, c.podInformerSynced); !ok {
return nil, fmt.Errorf("failed to wait for pod caches to sync")
} else {
log.V(100).Info("info: init the pod cache successfully")
}
if ok := clientgocache.WaitForCacheSync(stopCh, cache.ConfigMapInformerSynced); !ok {
return nil, fmt.Errorf("failed to wait for configmap caches to sync")
} else {
log.V(100).Info("info: init the configmap cache successfully")
}
log.V(100).Info("info: end to wait for cache")
return c, nil
}
func (c *Controller) BuildCache() error {
return c.schedulerCache.BuildCache()
}
func (c *Controller) GetSchedulerCache() *cache.SchedulerCache {
return c.schedulerCache
}
// Run will set up the event handlers
func (c *Controller) Run(threadiness int, stopCh <-chan struct{}) error {
defer runtime.HandleCrash()
defer c.podQueue.ShutDown()
log.V(9).Info("info: Starting GPU Sharing Controller.")
log.V(9).Info("info: Waiting for informer caches to sync")
log.V(9).Info("info: Starting %v workers.", threadiness)
for i := 0; i < threadiness; i++ {
go wait.Until(c.runWorker, time.Second, stopCh)
}
log.V(3).Info("info: Started workers")
<-stopCh
log.V(3).Info("info: Shutting down workers")
return nil
}
// runWorker is a long-running function that will continually call the
// processNextWorkItem function in order to read and process a message on the
// workqueue.
func (c *Controller) runWorker() {
for c.processNextWorkItem() {
}
}
// syncPod will sync the pod with the given key if it has had its expectations fulfilled,
// meaning it did not expect to see any more of its pods created or deleted. This function is not meant to be
// invoked concurrently with the same key.
func (c *Controller) syncPod(key string) (forget bool, err error) {
ns, name, err := clientgocache.SplitMetaNamespaceKey(key)
log.V(9).Info("debug: begin to sync gpushare pod %s in ns %s", name, ns)
if err != nil {
return false, err
}
pod, err := c.podLister.Pods(ns).Get(name)
switch {
case errors.IsNotFound(err):
log.V(10).Info("debug: pod %s in ns %s has been deleted.", name, ns)
pod, found := c.removePodCache[key]
if found {
c.schedulerCache.RemovePod(pod)
delete(c.removePodCache, key)
}
case err != nil:
log.V(10).Info("warn: unable to retrieve pod %v from the store: %v", key, err)
default:
if utils.IsCompletePod(pod) {
log.V(10).Info("debug: pod %s in ns %s has completed.", name, ns)
c.schedulerCache.RemovePod(pod)
} else {
err := c.schedulerCache.AddOrUpdatePod(pod)
if err != nil {
return false, err
}
}
}
return true, nil
}
// processNextWorkItem will read a single work item off the podQueue and
// attempt to process it.
func (c *Controller) processNextWorkItem() bool {
log.V(100).Info("debug: begin processNextWorkItem()")
key, quit := c.podQueue.Get()
if quit {
return false
}
defer c.podQueue.Done(key)
defer log.V(100).Info("debug: end processNextWorkItem()")
forget, err := c.syncPod(key.(string))
if err == nil {
if forget {
c.podQueue.Forget(key)
}
return true
}
log.V(3).Info("Error syncing pods: %v", err)
runtime.HandleError(fmt.Errorf("Error syncing pod: %v", err))
c.podQueue.AddRateLimited(key)
return true
}
func (c *Controller) addPodToCache(obj interface{}) {
pod, ok := obj.(*v1.Pod)
if !ok {
log.V(3).Info("warn: cannot convert to *v1.Pod: %v", obj)
return
}
// if !assignedNonTerminatedPod(t) {
// log.V(100).Info("debug: skip pod %s due to it's terminated.", pod.Name)
// return
// }
podKey, err := KeyFunc(pod)
if err != nil {
log.V(3).Info("warn: Failed to get the jobkey: %v", err)
return
}
c.podQueue.Add(podKey)
// NOTE: Updating equivalence cache of addPodToCache has been
// handled optimistically in: pkg/scheduler/scheduler.go#assume()
}
func (c *Controller) updatePodInCache(oldObj, newObj interface{}) {
oldPod, ok := oldObj.(*v1.Pod)
if !ok {
log.V(3).Info("warn: cannot convert oldObj to *v1.Pod: %v", oldObj)
return
}
newPod, ok := newObj.(*v1.Pod)
if !ok {
log.V(3).Info("warn: cannot convert newObj to *v1.Pod: %v", newObj)
return
}
needUpdate := false
podUID := oldPod.UID
// 1. Need update when pod is turned to complete or failed
if c.schedulerCache.KnownPod(podUID) && utils.IsCompletePod(newPod) {
needUpdate = true
}
// 2. Need update when it's unknown pod, and GPU annotation has been set
if !c.schedulerCache.KnownPod(podUID) && utils.GetGPUIDFromAnnotation(newPod) >= 0 {
needUpdate = true
}
if needUpdate {
podKey, err := KeyFunc(newPod)
if err != nil {
log.V(3).Info("warn: Failed to get the jobkey: %v", err)
return
}
log.V(3).Info("info: Need to update pod name %s in ns %s and old status is %v, new status is %v; its old annotation %v and new annotation %v",
newPod.Name,
newPod.Namespace,
oldPod.Status.Phase,
newPod.Status.Phase,
oldPod.Annotations,
newPod.Annotations)
c.podQueue.Add(podKey)
} else {
log.V(100).Info("debug: No need to update pod name %s in ns %s and old status is %v, new status is %v; its old annotation %v and new annotation %v",
newPod.Name,
newPod.Namespace,
oldPod.Status.Phase,
newPod.Status.Phase,
oldPod.Annotations,
newPod.Annotations)
}
return
}
func (c *Controller) deletePodFromCache(obj interface{}) {
var pod *v1.Pod
switch t := obj.(type) {
case *v1.Pod:
pod = t
case clientgocache.DeletedFinalStateUnknown:
var ok bool
pod, ok = t.Obj.(*v1.Pod)
if !ok {
log.V(3).Info("warn: cannot convert to *v1.Pod: %v", t.Obj)
return
}
default:
log.V(3).Info("warn: cannot convert to *v1.Pod: %v", t)
return
}
log.V(100).Info("debug: delete pod %s in ns %s", pod.Name, pod.Namespace)
podKey, err := KeyFunc(pod)
if err != nil {
log.V(3).Info("warn: Failed to get the jobkey: %v", err)
return
}
c.podQueue.Add(podKey)
c.removePodCache[podKey] = pod
}

View File

@@ -0,0 +1,70 @@
package log
import (
"fmt"
"os"
"sync"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
)
type levelLogger struct {
level *int32
mu sync.Mutex
log *zap.Logger
}
type verbose bool
var l *levelLogger
func NewLoggerWithLevel(level int32, option ...zap.Option) {
cfg := zap.NewProductionEncoderConfig()
cfg.EncodeTime = zapcore.ISO8601TimeEncoder
core := zapcore.NewCore(
zapcore.NewJSONEncoder(cfg),
zapcore.Lock(os.Stdout),
zap.NewAtomicLevel(),
)
if option == nil {
option = []zap.Option{}
}
option = append(option, zap.AddCaller(), zap.AddCallerSkip(1))
l = &levelLogger{
level: &level,
mu: sync.Mutex{},
log: zap.New(core, option...),
}
}
/*
V for log level, normal usage example
globalLogger default level 3, debug level 10
example level:
api request 4
api response 9
services func 5
db error 9
db query 11
db result 15
*/
func V(level int32) verbose {
return level < *l.level
}
func (v verbose) Info(format string, args ...interface{}) {
if v {
l.log.Info(fmt.Sprintf(format, args...))
}
}
func Fatal(format string, args ...interface{}) {
l.log.Fatal(fmt.Sprintf(format, args...))
}

View File

@@ -0,0 +1,64 @@
package routes
import (
"net/http"
"net/http/pprof"
"github.com/julienschmidt/httprouter"
)
func AddPProf(r *httprouter.Router) {
r.GET("/debug/pprof/", index)
r.GET("/debug/pprof/cmdline/", cmdline)
r.GET("/debug/pprof/profile/", profile)
r.GET("/debug/pprof/symbol/", symbol)
r.GET("/debug/pprof/trace/", trace)
r.GET("/debug/pprof/heap/", heap)
r.GET("/debug/pprof/goroutine/", goroutine)
r.GET("/debug/pprof/block/", block)
r.GET("/debug/pprof/threadcreate/", threadcreate)
r.GET("/debug/pprof/mutex/", mutex)
}
// profiling tools handlers
func index(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
pprof.Index(w, r)
}
func cmdline(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
pprof.Cmdline(w, r)
}
func profile(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
pprof.Profile(w, r)
}
func symbol(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
pprof.Symbol(w, r)
}
func trace(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
pprof.Trace(w, r)
}
func heap(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
pprof.Handler("heap").ServeHTTP(w, r)
}
func goroutine(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
pprof.Handler("goroutine").ServeHTTP(w, r)
}
func block(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
pprof.Handler("block").ServeHTTP(w, r)
}
func threadcreate(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
pprof.Handler("threadcreate").ServeHTTP(w, r)
}
func mutex(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
pprof.Handler("mutex").ServeHTTP(w, r)
}

View File

@@ -0,0 +1,181 @@
package routes
import (
"bytes"
"encoding/json"
"fmt"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
"io"
"net/http"
"time"
"github.com/julienschmidt/httprouter"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/scheduler"
schedulerapi "k8s.io/kube-scheduler/extender/v1"
)
const (
versionPath = "/version"
apiPrefix = "/gpushare-scheduler"
bindPrefix = apiPrefix + "/bind"
predicatesPrefix = apiPrefix + "/filter"
inspectPrefix = apiPrefix + "/inspect/:nodename"
inspectListPrefix = apiPrefix + "/inspect"
)
var (
version = "0.1.0"
// mu sync.RWMutex
)
func checkBody(w http.ResponseWriter, r *http.Request) {
if r.Body == nil {
http.Error(w, "Please send a request body", 400)
return
}
}
func InspectRoute(inspect *scheduler.Inspect) httprouter.Handle {
return func(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
result := inspect.Handler(ps.ByName("nodename"))
if resultBody, err := json.Marshal(result); err != nil {
// panic(err)
log.V(3).Info("warn: Failed due to %v", err)
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusInternalServerError)
errMsg := fmt.Sprintf("{'error':'%s'}", err.Error())
w.Write([]byte(errMsg))
} else {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write(resultBody)
}
}
}
func PredicateRoute(predicate *scheduler.Predicate) httprouter.Handle {
return func(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
checkBody(w, r)
// mu.RLock()
// defer mu.RUnlock()
var buf bytes.Buffer
body := io.TeeReader(r.Body, &buf)
var extenderArgs schedulerapi.ExtenderArgs
var extenderFilterResult *schedulerapi.ExtenderFilterResult
if err := json.NewDecoder(body).Decode(&extenderArgs); err != nil {
log.V(3).Info("warn: failed to parse request due to error %v", err)
extenderFilterResult = &schedulerapi.ExtenderFilterResult{
Nodes: nil,
FailedNodes: nil,
Error: err.Error(),
}
} else {
log.V(90).Info("debug: gpusharingfilter ExtenderArgs =%v", extenderArgs)
extenderFilterResult = predicate.Handler(&extenderArgs)
}
if resultBody, err := json.Marshal(extenderFilterResult); err != nil {
// panic(err)
log.V(3).Info("warn: Failed due to %v", err)
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusInternalServerError)
errMsg := fmt.Sprintf("{'error':'%s'}", err.Error())
w.Write([]byte(errMsg))
} else {
log.V(100).Info("predicate: %s, extenderFilterResult = %s ", predicate.Name, resultBody)
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write(resultBody)
}
}
}
func BindRoute(bind *scheduler.Bind) httprouter.Handle {
return func(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
checkBody(w, r)
// mu.Lock()
// defer mu.Unlock()
var buf bytes.Buffer
body := io.TeeReader(r.Body, &buf)
var extenderBindingArgs schedulerapi.ExtenderBindingArgs
var extenderBindingResult *schedulerapi.ExtenderBindingResult
failed := false
if err := json.NewDecoder(body).Decode(&extenderBindingArgs); err != nil {
extenderBindingResult = &schedulerapi.ExtenderBindingResult{
Error: err.Error(),
}
failed = true
} else {
log.V(10).Info("debug: gpusharingBind ExtenderArgs =%v", extenderBindingArgs)
extenderBindingResult = bind.Handler(extenderBindingArgs)
}
if len(extenderBindingResult.Error) > 0 {
failed = true
}
if resultBody, err := json.Marshal(extenderBindingResult); err != nil {
log.V(3).Info("warn: Failed due to %v", err)
// panic(err)
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusInternalServerError)
errMsg := fmt.Sprintf("{'error':'%s'}", err.Error())
w.Write([]byte(errMsg))
} else {
log.V(3).Info("info: extenderBindingResult = %s", resultBody)
w.Header().Set("Content-Type", "application/json")
if failed {
w.WriteHeader(http.StatusInternalServerError)
} else {
w.WriteHeader(http.StatusOK)
}
w.Write(resultBody)
}
}
}
func VersionRoute(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
fmt.Fprint(w, fmt.Sprint(version))
}
func AddVersion(router *httprouter.Router) {
router.GET(versionPath, DebugLogging(VersionRoute, versionPath))
}
func DebugLogging(h httprouter.Handle, path string) httprouter.Handle {
return func(w http.ResponseWriter, r *http.Request, p httprouter.Params) {
log.V(90).Info("path: %s, request body = %s", path, r.Body)
startTime := time.Now()
h(w, r, p)
log.V(90).Info("path: %s, response: %v, cost_time: %v", path, w, time.Now().Sub(startTime))
}
}
func AddPredicate(router *httprouter.Router, predicate *scheduler.Predicate) {
// path := predicatesPrefix + "/" + predicate.Name
router.POST(predicatesPrefix, DebugLogging(PredicateRoute(predicate), predicatesPrefix))
}
func AddBind(router *httprouter.Router, bind *scheduler.Bind) {
if handle, _, _ := router.Lookup("POST", bindPrefix); handle != nil {
log.V(3).Info("warning: AddBind was called more then once!")
} else {
router.POST(bindPrefix, DebugLogging(BindRoute(bind), bindPrefix))
}
}
func AddInspect(router *httprouter.Router, inspect *scheduler.Inspect) {
router.GET(inspectPrefix, DebugLogging(InspectRoute(inspect), inspectPrefix))
router.GET(inspectListPrefix, DebugLogging(InspectRoute(inspect), inspectListPrefix))
}

View File

@@ -0,0 +1,26 @@
package scheduler
import (
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/cache"
"k8s.io/apimachinery/pkg/types"
schedulerapi "k8s.io/kube-scheduler/extender/v1"
)
// Bind is responsible for binding node and pod
type Bind struct {
Name string
Func func(podName string, podNamespace string, podUID types.UID, node string, cache *cache.SchedulerCache) error
cache *cache.SchedulerCache
}
// Handler handles the Bind request
func (b Bind) Handler(args schedulerapi.ExtenderBindingArgs) *schedulerapi.ExtenderBindingResult {
err := b.Func(args.PodName, args.PodNamespace, args.PodUID, args.Node, b.cache)
errMsg := ""
if err != nil {
errMsg = err.Error()
}
return &schedulerapi.ExtenderBindingResult{
Error: errMsg,
}
}

View File

@@ -0,0 +1,71 @@
package scheduler
import (
"context"
"fmt"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/cache"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
)
const (
OptimisticLockErrorMsg = "the object has been modified; please apply your changes to the latest version and try again"
)
func NewGPUShareBind(ctx context.Context, clientset *kubernetes.Clientset, c *cache.SchedulerCache) *Bind {
return &Bind{
Name: "gpusharingbinding",
Func: func(name string, namespace string, podUID types.UID, node string, c *cache.SchedulerCache) error {
pod, err := getPod(ctx, name, namespace, podUID, clientset, c)
if err != nil {
log.V(9).Info("warn: Failed to handle pod %s in ns %s due to error %v", name, namespace, err)
return err
}
nodeInfo, err := c.GetNodeInfo(node)
if err != nil {
log.V(9).Info("warn: Failed to handle pod %s in ns %s due to error %v", name, namespace, err)
return err
}
err = nodeInfo.Allocate(clientset, pod)
if err != nil {
log.V(9).Info("warn: Failed to handle pod %s in ns %s due to error %v", name, namespace, err)
return err
}
return nil
},
cache: c,
}
}
func getPod(ctx context.Context, name string, namespace string, podUID types.UID, clientset *kubernetes.Clientset, c *cache.SchedulerCache) (pod *v1.Pod, err error) {
pod, err = c.GetPod(name, namespace)
if errors.IsNotFound(err) {
pod, err = clientset.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
return nil, err
}
} else if err != nil {
return nil, err
}
if pod.UID != podUID {
pod, err = clientset.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
return nil, err
}
if pod.UID != podUID {
return nil, fmt.Errorf("The pod %s in ns %s's uid is %v, and it's not equal with expected %v",
name,
namespace,
pod.UID,
podUID)
}
}
return pod, nil
}

View File

@@ -0,0 +1,42 @@
package scheduler
import (
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/cache"
)
func NewGPUShareInspect(c *cache.SchedulerCache) *Inspect {
return &Inspect{
Name: "gpushareinspect",
cache: c,
}
}
type Result struct {
Nodes []*Node `json:"nodes"`
Error string `json:"error,omitempty"`
}
type Node struct {
Name string `json:"name"`
TotalGPU uint `json:"totalGPU"`
UsedGPU uint `json:"usedGPU"`
Devices []*Device `json:"devs"`
}
type Device struct {
ID int `json:"id"`
TotalGPU uint `json:"totalGPU"`
UsedGPU uint `json:"usedGPU"`
Pods []*Pod `json:"pods"`
}
type Pod struct {
Name string `json:"name"`
Namespace string `json:"namespace"`
UsedGPU int `json:"usedGPU"`
}
type Inspect struct {
Name string
cache *cache.SchedulerCache
}

View File

@@ -0,0 +1,10 @@
package scheduler
import (
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/cache"
"k8s.io/client-go/kubernetes"
)
func NewGPUsharePredicate(clientset *kubernetes.Clientset, c *cache.SchedulerCache) *Predicate {
return &Predicate{Name: "gpusharingfilter", cache: c}
}

View File

@@ -0,0 +1,69 @@
package scheduler
import (
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/cache"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/utils"
)
func (in Inspect) Handler(name string) *Result {
nodes := []*Node{}
errMsg := ""
if len(name) == 0 {
nodeInfos := in.cache.GetNodeinfos()
for _, info := range nodeInfos {
nodes = append(nodes, buildNode(info))
}
} else {
node, err := in.cache.GetNodeInfo(name)
if err != nil {
errMsg = err.Error()
}
// nodeInfos = append(nodeInfos, node)
nodes = append(nodes, buildNode(node))
}
return &Result{
Nodes: nodes,
Error: errMsg,
}
}
func buildNode(info *cache.NodeInfo) *Node {
devInfos := info.GetDevs()
devs := []*Device{}
var usedGPU uint
for i, devInfo := range devInfos {
dev := &Device{
ID: i,
TotalGPU: devInfo.GetTotalGPUMemory(),
UsedGPU: devInfo.GetUsedGPUMemory(),
}
podInfos := devInfo.GetPods()
pods := []*Pod{}
for _, podInfo := range podInfos {
if utils.AssignedNonTerminatedPod(podInfo) {
pod := &Pod{
Namespace: podInfo.Namespace,
Name: podInfo.Name,
UsedGPU: utils.GetGPUMemoryFromPodResource(podInfo),
}
pods = append(pods, pod)
}
}
dev.Pods = pods
devs = append(devs, dev)
usedGPU += devInfo.GetUsedGPUMemory()
}
return &Node{
Name: info.GetName(),
TotalGPU: uint(info.GetTotalGPUMemory()),
UsedGPU: usedGPU,
Devices: devs,
}
}

View File

@@ -0,0 +1,87 @@
package scheduler
import (
"fmt"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/cache"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/utils"
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kube-scheduler/extender/v1"
)
type Predicate struct {
Name string
cache *cache.SchedulerCache
}
func (p Predicate) checkNode(pod *v1.Pod, nodeName string, c *cache.SchedulerCache) (*v1.Node, error) {
log.V(10).Info("info: check if the pod name %s can be scheduled on node %s", pod.Name, nodeName)
nodeInfo, err := c.GetNodeInfo(nodeName)
if err != nil {
return nil, err
}
node := nodeInfo.GetNode()
if node == nil {
return nil, fmt.Errorf("failed get node with name %s", nodeName)
}
if !utils.IsGPUSharingNode(node) {
return nil, fmt.Errorf("The node %s is not for GPU share, need skip", nodeName)
}
allocatable := nodeInfo.Assume(pod)
if !allocatable {
return nil, fmt.Errorf("Insufficient GPU Memory in one device")
} else {
log.V(10).Info("info: The pod %s in the namespace %s can be scheduled on %s",
pod.Name,
pod.Namespace,
nodeName)
}
return node, nil
}
func (p Predicate) Handler(args *schedulerapi.ExtenderArgs) *schedulerapi.ExtenderFilterResult {
if args == nil || args.Pod == nil {
return &schedulerapi.ExtenderFilterResult{Error: fmt.Sprintf("arg or pod is nil")}
}
pod := args.Pod
var nodeNames []string
if args.NodeNames != nil {
nodeNames = *args.NodeNames
log.V(3).Info("extender args NodeNames is not nil, result %+v", nodeNames)
} else if args.Nodes != nil {
for _, n := range args.Nodes.Items {
nodeNames = append(nodeNames, n.Name)
}
log.V(3).Info("extender args Nodes is not nil, names is %+v", nodeNames)
} else {
return &schedulerapi.ExtenderFilterResult{Error: fmt.Sprintf("cannot get node names")}
}
canSchedule := make([]string, 0, len(nodeNames))
canNotSchedule := make(map[string]string)
canScheduleNodes := &v1.NodeList{}
for _, nodeName := range nodeNames {
node, err := p.checkNode(pod, nodeName, p.cache)
if err != nil {
canNotSchedule[nodeName] = err.Error()
} else {
if node != nil {
canSchedule = append(canSchedule, nodeName)
canScheduleNodes.Items = append(canScheduleNodes.Items, *node)
}
}
}
result := schedulerapi.ExtenderFilterResult{
NodeNames: &canSchedule,
Nodes: canScheduleNodes,
FailedNodes: canNotSchedule,
Error: "",
}
log.V(100).Info("predicate result for %s, is %+v", pod.Name, result)
return &result
}

View File

@@ -0,0 +1,13 @@
package utils
const (
ResourceName = "rainbond.com/gpu-mem"
CountName = "rainbond.com/gpu-count"
EnvNVGPU = "NVIDIA_VISIBLE_DEVICES"
EnvResourceIndex = "ALIYUN_COM_GPU_MEM_IDX"
EnvResourceByPod = "ALIYUN_COM_GPU_MEM_POD"
EnvResourceByDev = "ALIYUN_COM_GPU_MEM_DEV"
EnvAssignedFlag = "ALIYUN_COM_GPU_MEM_ASSIGNED"
EnvResourceAssumeTime = "ALIYUN_COM_GPU_MEM_ASSUME_TIME"
)

View File

@@ -0,0 +1,30 @@
package utils
import "k8s.io/api/core/v1"
// Is the Node for GPU sharing
func IsGPUSharingNode(node *v1.Node) bool {
return GetTotalGPUMemory(node) > 0
}
// Get the total GPU memory of the Node
func GetTotalGPUMemory(node *v1.Node) int {
val, ok := node.Status.Capacity[ResourceName]
if !ok {
return 0
}
return int(val.Value())
}
// Get the GPU count of the node
func GetGPUCountInNode(node *v1.Node) int {
val, ok := node.Status.Capacity[CountName]
if !ok {
return int(0)
}
return int(val.Value())
}

View File

@@ -0,0 +1,219 @@
package utils
import (
"encoding/json"
"fmt"
"github.com/AliyunContainerService/gpushare-scheduler-extender/pkg/log"
v1 "k8s.io/api/core/v1"
"strconv"
"time"
)
// AssignedNonTerminatedPod selects pods that are assigned and non-terminal (scheduled and running).
func AssignedNonTerminatedPod(pod *v1.Pod) bool {
if pod.DeletionTimestamp != nil {
return false
}
if len(pod.Spec.NodeName) == 0 {
return false
}
if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
return false
}
return true
}
// IsCompletePod determines if the pod is complete
func IsCompletePod(pod *v1.Pod) bool {
if pod.DeletionTimestamp != nil {
return true
}
if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
return true
}
return false
}
// IsGPUsharingPod determines if it's the pod for GPU sharing
func IsGPUsharingPod(pod *v1.Pod) bool {
return GetGPUMemoryFromPodResource(pod) > 0
}
// GetGPUIDFromAnnotation gets GPU ID from Annotation
func GetGPUIDFromAnnotation(pod *v1.Pod) int {
id := -1
if len(pod.ObjectMeta.Annotations) > 0 {
value, found := pod.ObjectMeta.Annotations[EnvResourceIndex]
if found {
var err error
id, err = strconv.Atoi(value)
if err != nil {
log.V(9).Info("warn: Failed due to %v for pod %s in ns %s", err, pod.Name, pod.Namespace)
id = -1
}
}
}
return id
}
// GetGPUIDFromEnv gets GPU ID from Env
func GetGPUIDFromEnv(pod *v1.Pod) int {
id := -1
for _, container := range pod.Spec.Containers {
id = getGPUIDFromContainer(container)
if id >= 0 {
return id
}
}
return id
}
func getGPUIDFromContainer(container v1.Container) (devIdx int) {
devIdx = -1
var err error
loop:
for _, env := range container.Env {
if env.Name == EnvResourceIndex {
devIdx, err = strconv.Atoi(env.Value)
if err != nil {
log.V(9).Info("warn: Failed due to %v for %s", err, container.Name)
devIdx = -1
}
break loop
}
}
return devIdx
}
// GetGPUMemoryFromPodAnnotation gets the GPU Memory of the pod, choose the larger one between gpu memory and gpu init container memory
func GetGPUMemoryFromPodAnnotation(pod *v1.Pod) (gpuMemory uint) {
if len(pod.ObjectMeta.Annotations) > 0 {
value, found := pod.ObjectMeta.Annotations[EnvResourceByPod]
if found {
s, _ := strconv.Atoi(value)
if s < 0 {
s = 0
}
gpuMemory += uint(s)
}
}
log.V(100).Info("debug: pod %s in ns %s with status %v has GPU Mem %d",
pod.Name,
pod.Namespace,
pod.Status.Phase,
gpuMemory)
return gpuMemory
}
// GetGPUMemoryFromPodEnv gets the GPU Memory of the pod, choose the larger one between gpu memory and gpu init container memory
func GetGPUMemoryFromPodEnv(pod *v1.Pod) (gpuMemory uint) {
for _, container := range pod.Spec.Containers {
gpuMemory += getGPUMemoryFromContainerEnv(container)
}
log.V(100).Info("debug: pod %s in ns %s with status %v has GPU Mem %d",
pod.Name,
pod.Namespace,
pod.Status.Phase,
gpuMemory)
return gpuMemory
}
func getGPUMemoryFromContainerEnv(container v1.Container) (gpuMemory uint) {
gpuMemory = 0
loop:
for _, env := range container.Env {
if env.Name == EnvResourceByPod {
s, _ := strconv.Atoi(env.Value)
if s < 0 {
s = 0
}
gpuMemory = uint(s)
break loop
}
}
return gpuMemory
}
// GetGPUMemoryFromPodResource gets GPU Memory of the Pod
func GetGPUMemoryFromPodResource(pod *v1.Pod) int {
var total int
containers := pod.Spec.Containers
for _, container := range containers {
if val, ok := container.Resources.Limits[ResourceName]; ok {
total += int(val.Value())
}
}
return total
}
// GetGPUMemoryFromPodResource gets GPU Memory of the Container
func GetGPUMemoryFromContainerResource(container v1.Container) int {
var total int
if val, ok := container.Resources.Limits[ResourceName]; ok {
total += int(val.Value())
}
return total
}
// GetUpdatedPodEnvSpec updates pod env with devId
func GetUpdatedPodEnvSpec(oldPod *v1.Pod, devId int, totalGPUMemByDev int) (newPod *v1.Pod) {
newPod = oldPod.DeepCopy()
for i, c := range newPod.Spec.Containers {
gpuMem := GetGPUMemoryFromContainerResource(c)
if gpuMem > 0 {
envs := []v1.EnvVar{
// v1.EnvVar{Name: EnvNVGPU, Value: fmt.Sprintf("%d", devId)},
v1.EnvVar{Name: EnvResourceIndex, Value: fmt.Sprintf("%d", devId)},
v1.EnvVar{Name: EnvResourceByPod, Value: fmt.Sprintf("%d", gpuMem)},
v1.EnvVar{Name: EnvResourceByDev, Value: fmt.Sprintf("%d", totalGPUMemByDev)},
v1.EnvVar{Name: EnvAssignedFlag, Value: "false"},
}
for _, env := range envs {
newPod.Spec.Containers[i].Env = append(newPod.Spec.Containers[i].Env,
env)
}
}
}
return newPod
}
// GetUpdatedPodAnnotationSpec updates pod env with devId
func GetUpdatedPodAnnotationSpec(oldPod *v1.Pod, devId int, totalGPUMemByDev int) (newPod *v1.Pod) {
newPod = oldPod.DeepCopy()
if len(newPod.ObjectMeta.Annotations) == 0 {
newPod.ObjectMeta.Annotations = map[string]string{}
}
now := time.Now()
newPod.ObjectMeta.Annotations[EnvResourceIndex] = fmt.Sprintf("%d", devId)
newPod.ObjectMeta.Annotations[EnvResourceByDev] = fmt.Sprintf("%d", totalGPUMemByDev)
newPod.ObjectMeta.Annotations[EnvResourceByPod] = fmt.Sprintf("%d", GetGPUMemoryFromPodResource(newPod))
newPod.ObjectMeta.Annotations[EnvAssignedFlag] = "false"
newPod.ObjectMeta.Annotations[EnvResourceAssumeTime] = fmt.Sprintf("%d", now.UnixNano())
return newPod
}
func PatchPodAnnotationSpec(oldPod *v1.Pod, devId int, totalGPUMemByDev int) ([]byte, error) {
now := time.Now()
patchAnnotations := map[string]interface{}{
"metadata": map[string]map[string]string{"annotations": {
EnvResourceIndex: fmt.Sprintf("%d", devId),
EnvResourceByDev: fmt.Sprintf("%d", totalGPUMemByDev),
EnvResourceByPod: fmt.Sprintf("%d", GetGPUMemoryFromPodResource(oldPod)),
EnvAssignedFlag: "false",
EnvResourceAssumeTime: fmt.Sprintf("%d", now.UnixNano()),
}}}
return json.Marshal(patchAnnotations)
}

View File

@@ -0,0 +1,30 @@
package signals
import (
"os"
"os/signal"
"syscall"
)
var onlyOneSignalHandler = make(chan struct{})
var shutdownSignals = []os.Signal{os.Interrupt, syscall.SIGTERM}
// SetupSignalHandler registered for SIGTERM and SIGINT. A stop channel is returned
// which is closed on one of these signals. If a second signal is caught, the program
// is terminated with exit code 1.
func SetupSignalHandler() (stopCh <-chan struct{}) {
close(onlyOneSignalHandler) // panics when called twice
stop := make(chan struct{})
c := make(chan os.Signal, 2)
signal.Notify(c, shutdownSignals...)
go func() {
<-c
close(stop)
<-c
os.Exit(1) // second signal. Exit directly.
}()
return stop
}

View File

@@ -0,0 +1,29 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: binpack-1
labels:
app: binpack-1
spec:
replicas: 1
selector: # define how the deployment finds the pods it mangages
matchLabels:
app: binpack-1
template: # define the pods specifications
metadata:
labels:
app: binpack-1
spec:
containers:
- name: binpack-1
image: cheyang/gpu-player:v2
resources:
limits:
# GiB
aliyun.com/gpu-mem: 2

View File

@@ -0,0 +1,28 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: binpack-2
labels:
app: binpack-2
spec:
replicas: 1
selector: # define how the deployment finds the pods it mangages
matchLabels:
app: binpack-2
template: # define the pods specifications
metadata:
labels:
app: binpack-2
spec:
containers:
- name: binpack-2
image: cheyang/gpu-player:v2
resources:
limits:
aliyun.com/gpu-mem: 2

View File

@@ -0,0 +1,28 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: binpack-3
labels:
app: binpack-3
spec:
replicas: 1
selector: # define how the deployment finds the pods it mangages
matchLabels:
app: binpack-3
template: # define the pods specifications
metadata:
labels:
app: binpack-3
spec:
containers:
- name: binpack-3
image: cheyang/gpu-player:v2
resources:
limits:
aliyun.com/gpu-mem: 2

View File

@@ -0,0 +1,28 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: binpack-4
labels:
app: binpack-4
spec:
replicas: 1
selector: # define how the deployment finds the pods it mangages
matchLabels:
app: binpack-4
template: # define the pods specifications
metadata:
labels:
app: binpack-4
spec:
containers:
- name: binpack-4
image: cheyang/gpu-player:v2
resources:
limits:
aliyun.com/gpu-mem: 16276

View File

@@ -0,0 +1,9 @@
FROM cheyang/gpu-player
COPY main.py /app/main.py
COPY run.sh /app/run.sh
RUN chmod u+x /app/run.sh
CMD ["/app/run.sh"]

View File

@@ -0,0 +1,40 @@
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import tensorflow as tf
FLAGS = None
def train(fraction=1.0):
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = fraction
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
c = tf.matmul(a, b)
# Creates a session with log_device_placement set to True.
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = fraction
sess = tf.Session(config=config)
# Runs the op.
while True:
sess.run(c)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--total', type=float, default=1000,
help='Total GPU memory.')
parser.add_argument('--allocated', type=float, default=1000,
help='Allocated GPU memory.')
FLAGS, unparsed = parser.parse_known_args()
# fraction = FLAGS.allocated / FLAGS.total * 0.85
fraction = round( FLAGS.allocated * 0.7 / FLAGS.total , 1 )
print(fraction)
train(fraction)

View File

@@ -0,0 +1,6 @@
#!/usr/bin/env bash
echo ALIYUN_COM_GPU_MEM_DEV=$ALIYUN_COM_GPU_MEM_DEV
echo ALIYUN_COM_GPU_MEM_CONTAINER=$ALIYUN_COM_GPU_MEM_CONTAINER
python /app/main.py --total=$ALIYUN_COM_GPU_MEM_DEV --allocated=$ALIYUN_COM_GPU_MEM_CONTAINER

View File

@@ -0,0 +1,12 @@
FROM golang:1.19-alpine as build
WORKDIR /go/src/github.com/AliyunContainerService/gpushare-scheduler-extender
COPY . .
RUN go build -o /go/bin/gpushare-sche-extender cmd/*.go
FROM alpine
COPY --from=build /go/bin/gpushare-sche-extender /usr/bin/gpushare-sche-extender
CMD ["gpushare-sche-extender"]

View File

@@ -0,0 +1,5 @@
FROM alpine
COPY ./bin/gpushare-sche-extender /usr/bin/gpushare-sche-extender
CMD ["gpushare-sche-extender"]