Compare commits
9 Commits
Author | SHA1 | Date |
---|---|---|
Malte Grosse | 4323d3ebf3 | |
Malte Grosse | ef9e32ae72 | |
Malte Grosse | 3d33eae029 | |
Malte Grosse | ce15825a91 | |
Cornelius Specht | 5475e022df | |
Cornelius Specht | aac0ddad01 | |
Malte Grosse | f707547267 | |
Malte Grosse | 49fbd0c8c2 | |
Malte Grosse | b4dcf6632a |
|
@ -1,32 +1,13 @@
|
|||
platform: linux/amd64
|
||||
pipeline:
|
||||
first-job:
|
||||
image: busybox
|
||||
steps:
|
||||
"train":
|
||||
image: nvcr.io/nvidia/tensorflow:23.10-tf2-py3
|
||||
commands:
|
||||
- echo "ci working................. "
|
||||
cpu:
|
||||
image: progrium/stress:latest
|
||||
commands: /usr/bin/stress --cpu 20 --io 1 --vm 2 --vm-bytes 128M --timeout 90s
|
||||
nvidia-test:
|
||||
image: nvidia/cuda:11.6.2-base-ubuntu20.04
|
||||
- echo "starting python scrip sd "
|
||||
- python run.py
|
||||
"compress and upload":
|
||||
image: alpine:3
|
||||
commands:
|
||||
- nvidia-smi
|
||||
environment:
|
||||
- NVIDIA_VISIBLE_DEVICES=all
|
||||
gpu:
|
||||
image: oguzpastirmaci/gpu-burn:latest
|
||||
environment:
|
||||
- NVIDIA_VISIBLE_DEVICES=all
|
||||
commands:
|
||||
- cd /app
|
||||
- ./gpu_burn 120
|
||||
- echo "burned. done"
|
||||
|
||||
# 2nd:
|
||||
# image: oguzpastirmaci/gpu-burn:latest
|
||||
# environment:
|
||||
# - NVIDIA_VISIBLE_DEVICES=all
|
||||
# commands:
|
||||
# - ./gpu_burn 60
|
||||
|
||||
- apk --no-cache add zip curl
|
||||
- zip mymodel.zip mymodel.keras
|
||||
- curl -F fileUpload=@mymodel.zip https://share.storage.sandbox.iuk.hdm-stuttgart.de/upload
|
||||
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
# Long Running Trainings
|
||||
|
||||
This repository contains an example pipeline for long running training tasks.
|
||||
Detailed information can be found at the official [Sandbox Documentation](https://docs.sandbox.iuk.hdm-stuttgart.de/sandbox/training.html).
|
|
@ -0,0 +1,89 @@
|
|||
import tensorflow as tf
|
||||
from tensorflow import keras
|
||||
import requests
|
||||
import numpy as np
|
||||
import os
|
||||
# Version Information
|
||||
# tensorflow 2.2.0 , Cudnn7.6.5 and Cuda 10.1 , python 3.8
|
||||
from keras import backend as K
|
||||
K.clear_session()
|
||||
|
||||
gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||
# if gpus:
|
||||
# try:
|
||||
# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=6024)])
|
||||
# except RuntimeError as e:
|
||||
# print(e)
|
||||
# os.exit(1)
|
||||
|
||||
print(tf.config.experimental.list_physical_devices())
|
||||
print("test")
|
||||
print(tf.__version__)
|
||||
|
||||
print(tf.test.is_built_with_cuda())
|
||||
|
||||
(X_train, y_train), (X_test,y_test) = tf.keras.datasets.cifar10.load_data()
|
||||
|
||||
(X_train, y_train), (X_test,y_test) = tf.keras.datasets.cifar10.load_data()
|
||||
|
||||
print(X_train.shape,y_train.shape)
|
||||
|
||||
classes = ["airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"]
|
||||
|
||||
print(classes[y_train[3][0]])
|
||||
|
||||
print("pre processing: scale images")
|
||||
|
||||
X_train_scaled = X_train / 255
|
||||
X_test_scaled = X_test / 255
|
||||
|
||||
y_train_categorical = keras.utils.to_categorical(
|
||||
y_train, num_classes=10, dtype='float32'
|
||||
)
|
||||
y_test_categorical = keras.utils.to_categorical(
|
||||
y_test, num_classes=10, dtype='float32'
|
||||
)
|
||||
|
||||
print("model build")
|
||||
|
||||
|
||||
def get_model():
|
||||
model = keras.Sequential([
|
||||
keras.layers.Flatten(input_shape=(32,32,3)),
|
||||
keras.layers.Dense(3000, activation='relu'),
|
||||
keras.layers.Dense(1000, activation='relu'),
|
||||
keras.layers.Dense(10, activation='sigmoid')
|
||||
])
|
||||
|
||||
model.compile(optimizer='SGD',
|
||||
loss='categorical_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
return model
|
||||
|
||||
with tf.device('/GPU:0'):
|
||||
model = keras.Sequential([
|
||||
keras.layers.Flatten(input_shape=(32,32,3)),
|
||||
keras.layers.Dense(3000, activation='relu'),
|
||||
keras.layers.Dense(1000, activation='relu'),
|
||||
keras.layers.Dense(10, activation='sigmoid')
|
||||
])
|
||||
# g
|
||||
model.compile(optimizer='SGD',
|
||||
loss='categorical_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
model.fit(X_train_scaled, y_train_categorical, epochs=25)
|
||||
model.save('mymodel.keras')
|
||||
|
||||
print("finished training")
|
||||
|
||||
myurl = 'https://share.storage.sandbox.iuk.hdm-stuttgart.de/upload'
|
||||
|
||||
print("uploading file")
|
||||
files = {
|
||||
'fileUpload':('mymodel.keras', open('mymodel.keras', 'rb'),'application/octet-stream')
|
||||
}
|
||||
|
||||
response = requests.post(myurl, files=files)
|
||||
print(response,response.text)
|
||||
|
||||
print("done")
|
Loading…
Reference in New Issue