Compare commits
9 Commits
Author | SHA1 | Date |
---|---|---|
Malte Grosse | 4323d3ebf3 | |
Malte Grosse | ef9e32ae72 | |
Malte Grosse | 3d33eae029 | |
Malte Grosse | ce15825a91 | |
Cornelius Specht | 5475e022df | |
Cornelius Specht | aac0ddad01 | |
Malte Grosse | f707547267 | |
Malte Grosse | 49fbd0c8c2 | |
Malte Grosse | b4dcf6632a |
|
@ -1,32 +1,13 @@
|
||||||
platform: linux/amd64
|
steps:
|
||||||
pipeline:
|
"train":
|
||||||
first-job:
|
image: nvcr.io/nvidia/tensorflow:23.10-tf2-py3
|
||||||
image: busybox
|
|
||||||
commands:
|
commands:
|
||||||
- echo "ci working................. "
|
- echo "starting python scrip sd "
|
||||||
cpu:
|
- python run.py
|
||||||
image: progrium/stress:latest
|
"compress and upload":
|
||||||
commands: /usr/bin/stress --cpu 20 --io 1 --vm 2 --vm-bytes 128M --timeout 90s
|
image: alpine:3
|
||||||
nvidia-test:
|
|
||||||
image: nvidia/cuda:11.6.2-base-ubuntu20.04
|
|
||||||
commands:
|
|
||||||
- nvidia-smi
|
|
||||||
environment:
|
|
||||||
- NVIDIA_VISIBLE_DEVICES=all
|
|
||||||
gpu:
|
|
||||||
image: oguzpastirmaci/gpu-burn:latest
|
|
||||||
environment:
|
|
||||||
- NVIDIA_VISIBLE_DEVICES=all
|
|
||||||
commands:
|
commands:
|
||||||
- cd /app
|
- apk --no-cache add zip curl
|
||||||
- ./gpu_burn 120
|
- zip mymodel.zip mymodel.keras
|
||||||
- echo "burned. done"
|
- curl -F fileUpload=@mymodel.zip https://share.storage.sandbox.iuk.hdm-stuttgart.de/upload
|
||||||
|
|
||||||
# 2nd:
|
|
||||||
# image: oguzpastirmaci/gpu-burn:latest
|
|
||||||
# environment:
|
|
||||||
# - NVIDIA_VISIBLE_DEVICES=all
|
|
||||||
# commands:
|
|
||||||
# - ./gpu_burn 60
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
# Long Running Trainings
|
||||||
|
|
||||||
|
This repository contains an example pipeline for long running training tasks.
|
||||||
|
Detailed information can be found at the official [Sandbox Documentation](https://docs.sandbox.iuk.hdm-stuttgart.de/sandbox/training.html).
|
|
@ -0,0 +1,89 @@
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow import keras
|
||||||
|
import requests
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
# Version Information
|
||||||
|
# tensorflow 2.2.0 , Cudnn7.6.5 and Cuda 10.1 , python 3.8
|
||||||
|
from keras import backend as K
|
||||||
|
K.clear_session()
|
||||||
|
|
||||||
|
gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||||
|
# if gpus:
|
||||||
|
# try:
|
||||||
|
# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=6024)])
|
||||||
|
# except RuntimeError as e:
|
||||||
|
# print(e)
|
||||||
|
# os.exit(1)
|
||||||
|
|
||||||
|
print(tf.config.experimental.list_physical_devices())
|
||||||
|
print("test")
|
||||||
|
print(tf.__version__)
|
||||||
|
|
||||||
|
print(tf.test.is_built_with_cuda())
|
||||||
|
|
||||||
|
(X_train, y_train), (X_test,y_test) = tf.keras.datasets.cifar10.load_data()
|
||||||
|
|
||||||
|
(X_train, y_train), (X_test,y_test) = tf.keras.datasets.cifar10.load_data()
|
||||||
|
|
||||||
|
print(X_train.shape,y_train.shape)
|
||||||
|
|
||||||
|
classes = ["airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"]
|
||||||
|
|
||||||
|
print(classes[y_train[3][0]])
|
||||||
|
|
||||||
|
print("pre processing: scale images")
|
||||||
|
|
||||||
|
X_train_scaled = X_train / 255
|
||||||
|
X_test_scaled = X_test / 255
|
||||||
|
|
||||||
|
y_train_categorical = keras.utils.to_categorical(
|
||||||
|
y_train, num_classes=10, dtype='float32'
|
||||||
|
)
|
||||||
|
y_test_categorical = keras.utils.to_categorical(
|
||||||
|
y_test, num_classes=10, dtype='float32'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("model build")
|
||||||
|
|
||||||
|
|
||||||
|
def get_model():
|
||||||
|
model = keras.Sequential([
|
||||||
|
keras.layers.Flatten(input_shape=(32,32,3)),
|
||||||
|
keras.layers.Dense(3000, activation='relu'),
|
||||||
|
keras.layers.Dense(1000, activation='relu'),
|
||||||
|
keras.layers.Dense(10, activation='sigmoid')
|
||||||
|
])
|
||||||
|
|
||||||
|
model.compile(optimizer='SGD',
|
||||||
|
loss='categorical_crossentropy',
|
||||||
|
metrics=['accuracy'])
|
||||||
|
return model
|
||||||
|
|
||||||
|
with tf.device('/GPU:0'):
|
||||||
|
model = keras.Sequential([
|
||||||
|
keras.layers.Flatten(input_shape=(32,32,3)),
|
||||||
|
keras.layers.Dense(3000, activation='relu'),
|
||||||
|
keras.layers.Dense(1000, activation='relu'),
|
||||||
|
keras.layers.Dense(10, activation='sigmoid')
|
||||||
|
])
|
||||||
|
# g
|
||||||
|
model.compile(optimizer='SGD',
|
||||||
|
loss='categorical_crossentropy',
|
||||||
|
metrics=['accuracy'])
|
||||||
|
model.fit(X_train_scaled, y_train_categorical, epochs=25)
|
||||||
|
model.save('mymodel.keras')
|
||||||
|
|
||||||
|
print("finished training")
|
||||||
|
|
||||||
|
myurl = 'https://share.storage.sandbox.iuk.hdm-stuttgart.de/upload'
|
||||||
|
|
||||||
|
print("uploading file")
|
||||||
|
files = {
|
||||||
|
'fileUpload':('mymodel.keras', open('mymodel.keras', 'rb'),'application/octet-stream')
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(myurl, files=files)
|
||||||
|
print(response,response.text)
|
||||||
|
|
||||||
|
print("done")
|
Loading…
Reference in New Issue