-
Notifications
You must be signed in to change notification settings - Fork 344
Tensorflow Mnist for Gitlab CI
This is an introductory example of how to create a ci/cd pipeline with DVC-CML in Gitlab CI/CD Pipelines.
1. Install dvc in your computer if you haven't done it already
git clone your-repo-url
mkdir models metrics code
touch models/.gitkeep
touch metrics/.gitkeep
echo -e "tensorflow\nwget" >> requirements.txt
pip install tensorflow wget
code/mnist.py
import os
import sys
import gzip
import shutil
import numpy as np
import wget
def download(uri, path):
wget.download(uri, path)
def unzip(path):
input = gzip.GzipFile(path, 'rb')
s = input.read()
input.close()
output = open(path.replace('.gz', ''), 'wb')
output.write(s)
output.close()
def get_images(imgf, n):
f = open(imgf, "rb")
f.read(16)
images = []
for i in range(n):
image = []
for j in range(28*28):
image.append(ord(f.read(1)))
images.append(image)
return images
def get_labels(labelf, n):
l = open(labelf, "rb")
l.read(8)
labels = []
for i in range(n):
labels.append(ord(l.read(1)))
return labels
def output_csv(folder, images, labels, prefix):
if not os.path.exists(folder):
os.mkdir(folder)
o = open(os.path.join(folder, "mnist_%s.csv"%prefix), "w")
for i in range(len(images)):
o.write(",".join(str(x) for x in [labels[i]] + images[i]) + "\n")
o.close()
def process(folder, imgf, labelf, prefix, n):
images = get_images(os.path.join(folder, imgf), n)
labels = get_labels(os.path.join(folder, labelf), n)
output_csv(folder, images, labels, prefix)
def read_csv(path):
labels = []
imgs = []
with open(path) as f:
for i, line in enumerate(f):
data = line.split(',')
label = data[0]
label_one_hot = np.zeros(10)
label_one_hot[int(label)] = 1
labels.append(label_one_hot)
img = np.array(data[1:])
img = img.astype(np.float32)
img = np.multiply(img, 1.0 / 255.0)
imgs.append(img)
return (np.asarray(labels), np.asarray(imgs))
class DataSet(object):
def __init__(self, images, labels):
self.num_examples = images.shape[0]
self.images = images
self.labels = labels
self.epochs_completed = 0
self.index_in_epoch = 0
def next_batch(self, batch_size):
start = self.index_in_epoch
self.index_in_epoch += batch_size
if self.index_in_epoch > self.num_examples:
self.epochs_completed += 1
# Shuffle the data
perm = np.arange(self.num_examples)
np.random.shuffle(perm)
self.images = self.images[perm]
self.labels = self.labels[perm]
# Start next epoch
start = 0
self.index_in_epoch = batch_size
assert batch_size <= self.num_examples
end = self.index_in_epoch
return self.images[start:end], self.labels[start:end]
if __name__== "__main__":
if len(sys.argv) < 2:
print('folder is missing. Run command with folder path.')
exit(1)
out_folder = sys.argv[1]
if not os.path.exists(out_folder):
os.mkdir(out_folder)
else:
print('folder ' + out_folder + ' already exists! Delete it with all its content in order to prepare it')
exit(1)
SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
files = ['train-images-idx3-ubyte.gz',
'train-labels-idx1-ubyte.gz',
't10k-images-idx3-ubyte.gz',
't10k-labels-idx1-ubyte.gz' ]
for fil in files:
path = os.path.join(out_folder, fil)
download(SOURCE_URL + fil, out_folder)
unzip(path)
process(out_folder, "train-images-idx3-ubyte", "train-labels-idx1-ubyte", 'train', 60000)
process(out_folder, "t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte", 'test', 10000)
for filename in files:
path = os.path.join(out_folder, filename)
os.remove(path)
os.remove(path.replace('.gz', ''))
code/train.py
import os
import json
import time
import tensorflow.compat.v1 as tf
from tensorflow.python.util import deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False
tf.disable_v2_behavior()
import mnist
dirname = os.path.dirname(__file__)
train_labels, train_images = mnist.read_csv(os.path.join(dirname, '../data/mnist_train.csv'))
DATASET = mnist.DataSet(train_images, train_labels)
OUT = os.path.join(dirname, "../models/mnist")
batch_size = 128
num_steps = 1800
learning_rate = 0.01
start = time.time()
# input
x = tf.placeholder(tf.float32, [None, 784], "x")
y_ = tf.placeholder(tf.float32, [None, 10], "y")
# weight
W = tf.Variable(tf.zeros([784, 10]))
# bias
b = tf.Variable(tf.zeros([10]))
# test_data * W + b
y = tf.matmul(x, W) + b
sm = tf.nn.softmax(y, name="softmax")
# cross entropy (loss function)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_), name="loss")
# train step
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
# evaluating the model
correct_prediction = tf.equal(tf.argmax(sm, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy")
saver = tf.train.Saver()
init = tf.global_variables_initializer()
with tf.Session() as session:
session.run(init)
# training
for step in range(num_steps):
batch_data, batch_labels = DATASET.next_batch(batch_size)
feed_dict = {x: batch_data, y_: batch_labels}
loss_out, ts_out, acc_out = session.run([loss, train_step, accuracy], feed_dict=feed_dict)
save_path = saver.save(session, OUT)
with open(os.path.join(dirname, '../metrics/train.json'), 'w') as outfile:
json.dump({
"batch_size": batch_size,
"num_steps": num_steps,
"learning_rate": learning_rate,
"took" : (time.time() - start) / 1000 }, outfile)
code/eval.py
import os
import json
import tensorflow.compat.v1 as tf
from tensorflow.python.util import deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False
tf.disable_v2_behavior()
import mnist
dirname = os.path.dirname(__file__)
LABELS, IMAGES = mnist.read_csv(os.path.join(dirname, '../data/mnist_test.csv'))
META = os.path.join(dirname, '../models/mnist.meta')
MODELS = os.path.join(dirname, '../models/')
init = tf.global_variables_initializer()
with tf.Session() as sess:
saver = tf.train.import_meta_graph(META)
saver.restore(sess, tf.train.latest_checkpoint(MODELS))
graph = tf.get_default_graph()
x = graph.get_tensor_by_name("x:0")
y = graph.get_tensor_by_name("y:0")
softmax = graph.get_tensor_by_name("softmax:0")
accuracy = graph.get_tensor_by_name("accuracy:0")
feed_dict = { x: IMAGES, y: LABELS }
pred = sess.run([softmax, accuracy], feed_dict=feed_dict)
with open(os.path.join(dirname, '../metrics/eval.json'), 'w') as outfile:
json.dump({ "accuracy" : pred[1].item() }, outfile)
9. Setup dvc in your project, you will need to add a dvc remote storage
dvc init
dvc remote add -d myremote s3://your-s3-bucket/dvc-mnist-example
python code/mnist.py data
If everything has gone fine, you should have two folders inside data:
- train
- test
containing 60000 and 10000 small images respectively.
now track data with dvc running:
dvc add data
11. Let's create your dvc pipeline running the following commands:
dvc run --no-exec \
-f train.dvc \
-d code/train.py \
-d data/mnist_train.csv \
-o models \
-M metrics/train.json \
python code/train.py
dvc run --no-exec \
-f eval.dvc \
-d code/eval.py \
-d data/mnist_test.csv \
-d models \
-M metrics/eval.json \
python code/eval.py
.gitlab-ci.yml
stages:
- dvc_action_run
dvc:
stage: dvc_action_run
image: dvcorg/dvc-cml:latest
variables:
repro_targets: 'eval.dvc'
script:
- apt-get update && apt-get install -y python-pip && pip install --upgrade pip
- pip install -r requirements.txt
- dvc_cml_run
- Log in to GitLab.
- In the upper-right corner, click your avatar and select Settings.
- On the User Settings menu, select Access Tokens.
- Use repo_token as name and optional expiry date for the token.
- Choose api, read repository and write repository.
- Click the Create personal access token button.
12.b. Setup your AWS credentials and your repo_token as masked enviroment variables in your repo.
git add --all
git commit -m "first commit"
dvc push
git push
Congratulations! 🎉 you have created your first CD ML pipeline with DVC-CML Let's check what it's going to happen.
Runner could be running in your own servers with GPUs if needed! 😃 Check how to run your own runners
Every push/MR dvc repro will be executed if your dvc pipeline has changed, and only if has changed, allowing you to treat every branch or commit as a new experiment if you change your pipeline or treat it just as a normal git push if the pipeline does not change.
If everything went fine you will see that the job run properly and you will see another commit automatically generated with comment dvc repro [skip ci]
DVC-CML ran the dvc repro for you (doing the train and eval stages) and then pushed your changes into git and dvc! 🚀