synchronization

This commit is contained in:
2025-08-25 16:04:00 +08:00
commit 33f9b3ce46
1951 changed files with 854396 additions and 0 deletions

View File

@@ -0,0 +1,12 @@
FROM debian:bullseye-slim
RUN echo \
deb [arch=amd64] http://mirrors.aliyun.com/debian/ bullseye main non-free contrib\
> /etc/apt/sources.list
RUN apt-get update
RUN apt-get install -y curl tzdata iproute2 bash && \
rm -rf /var/cache/apt/* && \
cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \
echo "Asia/Shanghai" > /etc/timezone && \
mkdir -p /dp-evict
ADD dp-evict /dp-evict
RUN chmod -R +x /dp-evict

View File

@@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -xe
BASEDIR=$(dirname "$0")
echo "build docker images in $BASEDIR"
TIMESTAMP=$(date +%Y%m%d%H%M)
cd $BASEDIR
# docker build --no-cache -t $IMAGE -f $FILE $BASEDIR
docker build --network=host -t registry.cn-hangzhou.aliyuncs.com/acs/nvidia-device-plugin-evict:$TIMESTAMP .
docker push registry.cn-hangzhou.aliyuncs.com/acs/nvidia-device-plugin-evict:$TIMESTAMP
echo registry.cn-hangzhou.aliyuncs.com/acs/nvidia-device-plugin-evict:$TIMESTAMP

View File

@@ -0,0 +1,31 @@
#!/usr/bin/env bash
set -e -x
backup_dir="/etc/kubernetes/manifests-backup"
public::common::log() {
echo $(date +"[%Y%m%d %H:%M:%S]: ") $1
}
public::evict::gpu-device-plugin() {
dir=/etc/kubernetes/manifests/
if [ -f /etc/kubernetes/manifests/nvidia-device-plugin.yml ]; then
backup_dir="/etc/kubernetes/manifests-backup/"
mkdir -p $backup_dir
mv /etc/kubernetes/manifests/nvidia-device-plugin.yml $backup_dir
else
public::common::log "Skip removing nvidia-device-plugin.yml, because it doesn't exist."
fi
}
main() {
public::evict::gpu-device-plugin
touch /ready
while sleep 3600; do :; done
}
main "$@"

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
set -xe
if [ -d "/k8s-host" ]; then
rm -rf /k8s-host/usr/local/dp-evict
mkdir -p /k8s-host/usr/local/dp-evict
cp -r /dp-evict/* /k8s-host/usr/local/dp-evict
chmod -R +x /k8s-host/usr/local/dp-evict
chroot /k8s-host /usr/local/dp-evict/dp-evict-on-host.sh "$@"
while sleep 3600; do :; done
fi

View File

@@ -0,0 +1,12 @@
FROM debian:bullseye-slim
RUN echo \
deb [arch=amd64] http://mirrors.aliyun.com/debian/ bullseye main non-free contrib\
> /etc/apt/sources.list
RUN apt-get update
RUN apt-get install -y curl tzdata iproute2 bash && \
rm -rf /var/cache/apt/* && \
cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \
echo "Asia/Shanghai" > /etc/timezone && \
mkdir -p /dp-evict
ADD dp-evict /dp-evict
RUN chmod -R +x /dp-evict

View File

@@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -xe
BASEDIR=$(dirname "$0")
echo "build docker images in $BASEDIR"
TIMESTAMP=$(date +%Y%m%d%H%M)
cd $BASEDIR
# docker build --no-cache -t $IMAGE -f $FILE $BASEDIR
docker build --network=host -t registry.cn-hangzhou.aliyuncs.com/acs/nvidia-device-plugin-recover:$TIMESTAMP .
docker push registry.cn-hangzhou.aliyuncs.com/acs/nvidia-device-plugin-recover:$TIMESTAMP
echo registry.cn-hangzhou.aliyuncs.com/acs/nvidia-device-plugin-recover:$TIMESTAMP

View File

@@ -0,0 +1,34 @@
#!/usr/bin/env bash
set -e -x
dir="/etc/kubernetes/manifests"
backup_dir="/etc/kubernetes/manifests-backup"
public::common::log() {
echo $(date +"[%Y%m%d %H:%M:%S]: ") $1
}
public::recover::gpu-device-plugin() {
if [ -f $dir/nvidia-device-plugin.yml ]; then
public::common::log "Skip recovering nvidia-device-plugin.yml, because it already exist."
else
if [ -f $backup_dir/nvidia-device-plugin.yml ]; then
cp -f $backup_dir/nvidia-device-plugin.yml $dir/nvidia-device-plugin.yml
public::common::log "Finish recovering nvidia-device-plugin.yml."
else
public::common::log "No nvidia-device-plugin.yml to recover."
fi
fi
}
main() {
public::recover::gpu-device-plugin
touch /ready
while sleep 3600; do :; done
}
main "$@"

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
set -xe
if [ -d "/k8s-host" ]; then
rm -rf /k8s-host/usr/local/dp-evict
mkdir -p /k8s-host/usr/local/dp-evict
cp -r /dp-evict/* /k8s-host/usr/local/dp-evict
chmod -R +x /k8s-host/usr/local/dp-evict
chroot /k8s-host /usr/local/dp-evict/dp-recover-on-host.sh "$@"
while sleep 3600; do :; done
fi

View File

@@ -0,0 +1,14 @@
FROM debian:bullseye-slim
RUN echo \
deb [arch=amd64] http://mirrors.aliyun.com/debian/ bullseye main non-free contrib\
> /etc/apt/sources.list
RUN apt-get update
RUN apt-get install -y curl tzdata iproute2 bash && \
rm -rf /var/cache/apt/* && \
cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \
echo "Asia/Shanghai" > /etc/timezone && \
mkdir -p /schd-extender
ADD schd-extender /schd-extender
RUN chmod -R +x /schd-extender

View File

@@ -0,0 +1,17 @@
#!/usr/bin/env bash
set -xe
BASEDIR=$(dirname "$0")
echo "build docker images in $BASEDIR"
TIMESTAMP=$(date +%Y%m%d%H%M)
cd $BASEDIR
# docker build --no-cache -t $IMAGE -f $FILE $BASEDIR
docker build -t registry.cn-hangzhou.aliyuncs.com/acs/schd-extender-deployer:$TIMESTAMP .
docker tag registry.cn-hangzhou.aliyuncs.com/acs/schd-extender-deployer:$TIMESTAMP cheyang/schd-extender-deployer:$TIMESTAMP
docker push registry.cn-hangzhou.aliyuncs.com/acs/schd-extender-deployer:$TIMESTAMP

View File

@@ -0,0 +1,75 @@
#!/usr/bin/env bash
set -e -x
dir=/etc/kubernetes/manifests
backup_dir="/etc/kubernetes/manifests-backup"
TIMESTAMP=$(date +%Y%m%d%H%M%S)
public::common::log() {
echo $(date +"[%Y%m%d %H:%M:%S]: ") $1
}
public::deployer::sche-policy-config() {
mkdir -p $backup_dir
if [ ! -f $backup_dir/kube-scheduler.ori.yaml ];then
cp /etc/kubernetes/manifests/kube-scheduler.yaml $backup_dir/kube-scheduler.ori.yaml
public::common::log "Backup $backup_dir/kube-scheduler.ori.yaml"
else
cp /etc/kubernetes/manifests/kube-scheduler.yaml $backup_dir/kube-scheduler-$TIMESTAMP.yaml
public::common::log "Backup $backup_dir/kube-scheduler-$TIMESTAMP.yaml"
fi
if [ ! -f $backup_dir/scheduler-policy-config.ori.json ];then
if [ -f /etc/kubernetes/scheduler-policy-config.json ];then
cp /etc/kubernetes/scheduler-policy-config.json $backup_dir/scheduler-policy-config.ori.json
public::common::log "Backup $backup_dir/scheduler-policy-config.ori.json"
fi
else
if [ -f /etc/kubernetes/scheduler-policy-config.json ];then
cp /etc/kubernetes/scheduler-policy-config.json $backup_dir/scheduler-policy-config-$TIMESTAMP.json
public::common::log "Backup $backup_dir/scheduler-policy-config-$TIMESTAMP.json"
fi
fi
public::common::log "Configure shceduler extender"
cp -f /schd-extender/scheduler-policy-config.json /etc/kubernetes/scheduler-policy-config.json
sed -i 's/127.0.0.1/'"${NODE_IP}"'/g' /etc/kubernetes/scheduler-policy-config.json
if ! grep 'deployment.kubernetes.io/revision' $dir/kube-scheduler.yaml; then
sed -i '/scheduler.alpha.kubernetes.io\/critical-pod/a \ deployment.kubernetes.io/revision: "'"${TIMESTAMP}"'"' $dir/kube-scheduler.yaml
else
# sed -i '/deployment.kubernetes.io\/revision/d' $dir/kube-scheduler.yaml
sed -i 's#deployment.kubernetes.io/revision:.*#deployment.kubernetes.io/revision: "'"${TIMESTAMP}"'"#' $dir/kube-scheduler.yaml
fi
if ! grep 'policy-config-file=/etc/kubernetes/scheduler-policy-config.json' $dir/kube-scheduler.yaml; then
sed -i "/- kube-scheduler/a\ \ \ \ - --policy-config-file=/etc/kubernetes/scheduler-policy-config.json" $dir/kube-scheduler.yaml
else
public::common::log "Skip the kube-scheduler config, because it's already configured extender."
fi
# add scheduler config policy volumeMounts
if ! grep 'mountPath: /etc/kubernetes/scheduler-policy-config.json' $dir/kube-scheduler.yaml; then
sed -i "/ volumeMounts:/a\ \ \ \ - mountPath: /etc/kubernetes/scheduler-policy-config.json\n name: scheduler-policy-config\n readOnly: true" $dir/kube-scheduler.yaml
else
public::common::log "Skip the scheduler-policy-config mountPath, because it's already configured extender."
fi
# add scheduler config policy volumes
if ! grep 'path: /etc/kubernetes/scheduler-policy-config.json' $dir/kube-scheduler.yaml; then
sed -i "/ volumes:/a \ - hostPath:\n path: /etc/kubernetes/scheduler-policy-config.json\n type: FileOrCreate\n name: scheduler-policy-config" $dir/kube-scheduler.yaml
else
public::common::log "Skip the scheduler-policy-config volumes, because it's already configured extender."
fi
}
main() {
public::deployer::sche-policy-config
touch /ready
#while sleep 3600; do :; done
}
main

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
set -xe
if [ -d "/k8s-host" ]; then
rm -rf /k8s-host/usr/local/k8s-schd-extender
mkdir -p /k8s-host/usr/local/k8s-schd-extender
cp -r /schd-extender/* /k8s-host/usr/local/k8s-schd-extender
chmod -R +x /k8s-host/usr/local/k8s-schd-extender/
chroot /k8s-host /usr/local/k8s-schd-extender/install-sched-extender-on-host.sh
while sleep 3600; do :; done
fi

View File

@@ -0,0 +1,20 @@
{
"kind": "Policy",
"apiVersion": "v1",
"extenders": [
{
"urlPrefix": "http://127.0.0.1:32766/gpushare-scheduler",
"filterVerb": "filter",
"bindVerb": "bind",
"enableHttps": false,
"nodeCacheCapable": true,
"managedResources": [
{
"name": "aliyun.com/gpu-mem",
"ignoredByScheduler": false
}
],
"ignorable": false
}
]
}