Compare commits
	
		
			9 Commits
		
	
	
	| Author | SHA1 | Date | 
|---|---|---|
|  | 4323d3ebf3 | |
|  | ef9e32ae72 | |
|  | 3d33eae029 | |
|  | ce15825a91 | |
|  | 5475e022df | |
|  | aac0ddad01 | |
|  | f707547267 | |
|  | 49fbd0c8c2 | |
|  | b4dcf6632a | 
|  | @ -1,16 +1,8 @@ | ||||||
| matrix: |  | ||||||
|   EPOCHS: |  | ||||||
|     - 20 |  | ||||||
|     - 30 |  | ||||||
|   OPTIMIZER:  |  | ||||||
|     - adam |  | ||||||
|     - SGD |  | ||||||
| 
 |  | ||||||
| steps: | steps: | ||||||
|   "train": |   "train": | ||||||
|     image: nvcr.io/nvidia/tensorflow:23.10-tf2-py3 |     image: nvcr.io/nvidia/tensorflow:23.10-tf2-py3 | ||||||
|     commands: |     commands: | ||||||
|       - echo "starting python script" |       - echo "starting python scrip sd " | ||||||
|       - python run.py |       - python run.py | ||||||
|   "compress and upload": |   "compress and upload": | ||||||
|     image: alpine:3 |     image: alpine:3 | ||||||
|  |  | ||||||
|  | @ -0,0 +1,4 @@ | ||||||
|  | # Long Running Trainings | ||||||
|  | 
 | ||||||
|  | This repository contains an example pipeline for long running training tasks.  | ||||||
|  | Detailed information can be found at the official [Sandbox Documentation](https://docs.sandbox.iuk.hdm-stuttgart.de/sandbox/training.html). | ||||||
							
								
								
									
										10
									
								
								run.py
								
								
								
								
							
							
						
						
									
										10
									
								
								run.py
								
								
								
								
							|  | @ -8,10 +8,6 @@ import os | ||||||
| from keras import backend as K | from keras import backend as K | ||||||
| K.clear_session() | K.clear_session() | ||||||
| 
 | 
 | ||||||
| EPOCHS = int(os.getenv("EPOCHS", default = 10)) |  | ||||||
| OPTIMIZER = os.getenv("OPTIMIZER", default = "SGD") |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| gpus = tf.config.experimental.list_physical_devices('GPU') | gpus = tf.config.experimental.list_physical_devices('GPU') | ||||||
| # if gpus: | # if gpus: | ||||||
| #   try: | #   try: | ||||||
|  | @ -21,7 +17,7 @@ gpus = tf.config.experimental.list_physical_devices('GPU') | ||||||
| #     os.exit(1) | #     os.exit(1) | ||||||
| 
 | 
 | ||||||
| print(tf.config.experimental.list_physical_devices()) | print(tf.config.experimental.list_physical_devices()) | ||||||
| 
 | print("test") | ||||||
| print(tf.__version__) | print(tf.__version__) | ||||||
| 
 | 
 | ||||||
| print(tf.test.is_built_with_cuda()) | print(tf.test.is_built_with_cuda()) | ||||||
|  | @ -72,10 +68,10 @@ with tf.device('/GPU:0'): | ||||||
|             keras.layers.Dense(10, activation='sigmoid')     |             keras.layers.Dense(10, activation='sigmoid')     | ||||||
|         ]) |         ]) | ||||||
|   # g |   # g | ||||||
|     model.compile(optimizer=OPTIMIZER, |     model.compile(optimizer='SGD', | ||||||
|                   loss='categorical_crossentropy', |                   loss='categorical_crossentropy', | ||||||
|                   metrics=['accuracy']) |                   metrics=['accuracy']) | ||||||
|     model.fit(X_train_scaled, y_train_categorical, epochs=EPOCHS) |     model.fit(X_train_scaled, y_train_categorical, epochs=25) | ||||||
|     model.save('mymodel.keras') |     model.save('mymodel.keras') | ||||||
|      |      | ||||||
|     print("finished training") |     print("finished training") | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue