synchronization
This commit is contained in:
29
gpushare-scheduler-extender/samples/1.yaml
Normal file
29
gpushare-scheduler-extender/samples/1.yaml
Normal file
@@ -0,0 +1,29 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
|
||||
metadata:
|
||||
name: binpack-1
|
||||
labels:
|
||||
app: binpack-1
|
||||
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector: # define how the deployment finds the pods it mangages
|
||||
matchLabels:
|
||||
app: binpack-1
|
||||
|
||||
template: # define the pods specifications
|
||||
metadata:
|
||||
labels:
|
||||
app: binpack-1
|
||||
|
||||
spec:
|
||||
containers:
|
||||
- name: binpack-1
|
||||
image: cheyang/gpu-player:v2
|
||||
resources:
|
||||
limits:
|
||||
# GiB
|
||||
aliyun.com/gpu-mem: 2
|
||||
|
28
gpushare-scheduler-extender/samples/2.yaml
Normal file
28
gpushare-scheduler-extender/samples/2.yaml
Normal file
@@ -0,0 +1,28 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
|
||||
metadata:
|
||||
name: binpack-2
|
||||
labels:
|
||||
app: binpack-2
|
||||
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector: # define how the deployment finds the pods it mangages
|
||||
matchLabels:
|
||||
app: binpack-2
|
||||
|
||||
template: # define the pods specifications
|
||||
metadata:
|
||||
labels:
|
||||
app: binpack-2
|
||||
|
||||
spec:
|
||||
containers:
|
||||
- name: binpack-2
|
||||
image: cheyang/gpu-player:v2
|
||||
resources:
|
||||
limits:
|
||||
aliyun.com/gpu-mem: 2
|
||||
|
28
gpushare-scheduler-extender/samples/3.yaml
Normal file
28
gpushare-scheduler-extender/samples/3.yaml
Normal file
@@ -0,0 +1,28 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
|
||||
metadata:
|
||||
name: binpack-3
|
||||
labels:
|
||||
app: binpack-3
|
||||
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector: # define how the deployment finds the pods it mangages
|
||||
matchLabels:
|
||||
app: binpack-3
|
||||
|
||||
template: # define the pods specifications
|
||||
metadata:
|
||||
labels:
|
||||
app: binpack-3
|
||||
|
||||
spec:
|
||||
containers:
|
||||
- name: binpack-3
|
||||
image: cheyang/gpu-player:v2
|
||||
resources:
|
||||
limits:
|
||||
aliyun.com/gpu-mem: 2
|
||||
|
28
gpushare-scheduler-extender/samples/4.yaml
Normal file
28
gpushare-scheduler-extender/samples/4.yaml
Normal file
@@ -0,0 +1,28 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
|
||||
metadata:
|
||||
name: binpack-4
|
||||
labels:
|
||||
app: binpack-4
|
||||
|
||||
spec:
|
||||
replicas: 1
|
||||
|
||||
selector: # define how the deployment finds the pods it mangages
|
||||
matchLabels:
|
||||
app: binpack-4
|
||||
|
||||
template: # define the pods specifications
|
||||
metadata:
|
||||
labels:
|
||||
app: binpack-4
|
||||
|
||||
spec:
|
||||
containers:
|
||||
- name: binpack-4
|
||||
image: cheyang/gpu-player:v2
|
||||
resources:
|
||||
limits:
|
||||
aliyun.com/gpu-mem: 16276
|
||||
|
9
gpushare-scheduler-extender/samples/docker/Dockerfile
Normal file
9
gpushare-scheduler-extender/samples/docker/Dockerfile
Normal file
@@ -0,0 +1,9 @@
|
||||
FROM cheyang/gpu-player
|
||||
|
||||
COPY main.py /app/main.py
|
||||
|
||||
COPY run.sh /app/run.sh
|
||||
|
||||
RUN chmod u+x /app/run.sh
|
||||
|
||||
CMD ["/app/run.sh"]
|
40
gpushare-scheduler-extender/samples/docker/main.py
Normal file
40
gpushare-scheduler-extender/samples/docker/main.py
Normal file
@@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
FLAGS = None
|
||||
|
||||
def train(fraction=1.0):
|
||||
config = tf.ConfigProto()
|
||||
config.gpu_options.per_process_gpu_memory_fraction = fraction
|
||||
|
||||
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
|
||||
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
|
||||
c = tf.matmul(a, b)
|
||||
# Creates a session with log_device_placement set to True.
|
||||
config = tf.ConfigProto()
|
||||
config.gpu_options.per_process_gpu_memory_fraction = fraction
|
||||
sess = tf.Session(config=config)
|
||||
# Runs the op.
|
||||
while True:
|
||||
sess.run(c)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--total', type=float, default=1000,
|
||||
help='Total GPU memory.')
|
||||
parser.add_argument('--allocated', type=float, default=1000,
|
||||
help='Allocated GPU memory.')
|
||||
FLAGS, unparsed = parser.parse_known_args()
|
||||
# fraction = FLAGS.allocated / FLAGS.total * 0.85
|
||||
fraction = round( FLAGS.allocated * 0.7 / FLAGS.total , 1 )
|
||||
|
||||
print(fraction)
|
||||
train(fraction)
|
6
gpushare-scheduler-extender/samples/docker/run.sh
Normal file
6
gpushare-scheduler-extender/samples/docker/run.sh
Normal file
@@ -0,0 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
echo ALIYUN_COM_GPU_MEM_DEV=$ALIYUN_COM_GPU_MEM_DEV
|
||||
echo ALIYUN_COM_GPU_MEM_CONTAINER=$ALIYUN_COM_GPU_MEM_CONTAINER
|
||||
|
||||
python /app/main.py --total=$ALIYUN_COM_GPU_MEM_DEV --allocated=$ALIYUN_COM_GPU_MEM_CONTAINER
|
Reference in New Issue
Block a user