synchronization

2025-08-25 16:04:00 +08:00
commit 33f9b3ce46
1951 changed files with 854396 additions and 0 deletions
--- a/gpushare-scheduler-extender/samples/1.yaml
+++ b/gpushare-scheduler-extender/samples/1.yaml
@@ -0,0 +1,29 @@
+apiVersion: apps/v1
+kind: Deployment
+
+metadata:
+  name: binpack-1
+  labels:
+    app: binpack-1
+
+spec:
+  replicas: 1
+
+  selector: # define how the deployment finds the pods it mangages
+    matchLabels:
+      app: binpack-1
+
+  template: # define the pods specifications
+    metadata:
+      labels:
+        app: binpack-1
+
+    spec:
+      containers:
+      - name: binpack-1
+        image: cheyang/gpu-player:v2
+        resources:
+          limits:
+            # GiB
+            aliyun.com/gpu-mem: 2
+
--- a/gpushare-scheduler-extender/samples/2.yaml
+++ b/gpushare-scheduler-extender/samples/2.yaml
@@ -0,0 +1,28 @@
+apiVersion: apps/v1
+kind: Deployment
+
+metadata:
+  name: binpack-2
+  labels:
+    app: binpack-2
+
+spec:
+  replicas: 1
+
+  selector: # define how the deployment finds the pods it mangages
+    matchLabels:
+      app: binpack-2
+
+  template: # define the pods specifications
+    metadata:
+      labels:
+        app: binpack-2
+
+    spec:
+      containers:
+      - name: binpack-2
+        image: cheyang/gpu-player:v2
+        resources:
+          limits:
+            aliyun.com/gpu-mem: 2
+
--- a/gpushare-scheduler-extender/samples/3.yaml
+++ b/gpushare-scheduler-extender/samples/3.yaml
@@ -0,0 +1,28 @@
+apiVersion: apps/v1
+kind: Deployment
+
+metadata:
+  name: binpack-3
+  labels:
+    app: binpack-3
+
+spec:
+  replicas: 1
+
+  selector: # define how the deployment finds the pods it mangages
+    matchLabels:
+      app: binpack-3
+
+  template: # define the pods specifications
+    metadata:
+      labels:
+        app: binpack-3
+
+    spec:
+      containers:
+      - name: binpack-3
+        image: cheyang/gpu-player:v2
+        resources:
+          limits:
+            aliyun.com/gpu-mem: 2
+
--- a/gpushare-scheduler-extender/samples/4.yaml
+++ b/gpushare-scheduler-extender/samples/4.yaml
@@ -0,0 +1,28 @@
+apiVersion: apps/v1
+kind: Deployment
+
+metadata:
+  name: binpack-4
+  labels:
+    app: binpack-4
+
+spec:
+  replicas: 1
+
+  selector: # define how the deployment finds the pods it mangages
+    matchLabels:
+      app: binpack-4
+
+  template: # define the pods specifications
+    metadata:
+      labels:
+        app: binpack-4
+
+    spec:
+      containers:
+      - name: binpack-4
+        image: cheyang/gpu-player:v2
+        resources:
+          limits:
+            aliyun.com/gpu-mem: 16276
+
--- a/gpushare-scheduler-extender/samples/docker/Dockerfile
+++ b/gpushare-scheduler-extender/samples/docker/Dockerfile
@@ -0,0 +1,9 @@
+FROM cheyang/gpu-player
+
+COPY main.py /app/main.py
+
+COPY run.sh /app/run.sh
+
+RUN chmod u+x /app/run.sh
+
+CMD ["/app/run.sh"]
--- a/gpushare-scheduler-extender/samples/docker/main.py
+++ b/gpushare-scheduler-extender/samples/docker/main.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+
+import tensorflow as tf
+
+FLAGS = None
+
+def train(fraction=1.0):
+	config = tf.ConfigProto()
+	config.gpu_options.per_process_gpu_memory_fraction = fraction
+
+	a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
+	b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
+	c = tf.matmul(a, b)
+    # Creates a session with log_device_placement set to True.
+	config = tf.ConfigProto()
+	config.gpu_options.per_process_gpu_memory_fraction = fraction
+	sess = tf.Session(config=config)
+	# Runs the op.
+	while True:
+		sess.run(c)
+
+
+if __name__ == '__main__':
+	parser = argparse.ArgumentParser()
+	parser.add_argument('--total', type=float, default=1000,
+                      help='Total GPU memory.')
+	parser.add_argument('--allocated', type=float, default=1000,
+                      help='Allocated GPU memory.')
+	FLAGS, unparsed = parser.parse_known_args()
+	# fraction = FLAGS.allocated / FLAGS.total * 0.85
+	fraction = round( FLAGS.allocated * 0.7 / FLAGS.total , 1 )
+
+	print(fraction)
+	train(fraction)
--- a/gpushare-scheduler-extender/samples/docker/run.sh
+++ b/gpushare-scheduler-extender/samples/docker/run.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+echo ALIYUN_COM_GPU_MEM_DEV=$ALIYUN_COM_GPU_MEM_DEV
+echo ALIYUN_COM_GPU_MEM_CONTAINER=$ALIYUN_COM_GPU_MEM_CONTAINER
+
+python /app/main.py --total=$ALIYUN_COM_GPU_MEM_DEV --allocated=$ALIYUN_COM_GPU_MEM_CONTAINER