Skip to content

Commit

Permalink
added ec2 benchmarking script
Browse files Browse the repository at this point in the history
  • Loading branch information
Danny Bickson committed Nov 3, 2013
1 parent 425f084 commit cfef5c9
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 3 deletions.
77 changes: 77 additions & 0 deletions scripts/ec2/benchmark_ec2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/bin/bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This script is an example benchmarking of GraphLab for EC2
# for testing scaling
# (C) GraphLab Inc. 2013
# Please send any questions or bug reports to [email protected]
# Written by Danny Bickson

MAX_SLAVES=3 # configure the maximum number of slaves
MAX_RETRY=3 # configure the number of experiemnt repeats
PAGERANK=1 # if 1, runs pagerank
SVD=1 # if 1, runs svd
ALS=1 # if 1, runs als

# clean old running instances, if any
echo "y" | ./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2 destroy hpctest
# launch ec2 cc2.8xlarge image
./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2 -a hpc -s $MAX_SLAVES -t cc2.8xlarge launch hpctest
# update the GraphLab version to be the latest, recompile, and update slaves
./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2 update hpctest

# run pagerank benchmarks
if [ $PAGERANK -eq 1 ]; then
for i in `seq 0 1 $MAX_SLAVES`
do
echo "Running Pagerank"
for j in `seq 0 1 $MAX_RETRY`
do
./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2 -s $i pagerank_demo hpctest
done
done
fi

# run SVD benchmarks
if [ $SVD -eq 1 ]; then
for i in `seq 0 1 $MAX_SLAVES`
do
echo "Running SVD"
for j in `seq 0 1 $MAX_RETRY`
do
./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2 -s $i svd_demo hpctest
done
done
fi

# run ALS benchmarks
if [ $ALS -eq 1 ]; then
for i in `seq 0 1 $MAX_SLAVES`
do
echo "Running ALS"
for j in `seq 0 1 $MAX_RETRY`
do
if [ $first_time -eq 1 ]; then
./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2 -s $i als_demo hpctest
fi
done
done
fi

# clean everything
echo "y" | ./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2 destroy hpctest
6 changes: 3 additions & 3 deletions scripts/ec2/gl_ec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ def main():
master = master_nodes[0].public_dns_name
print "Running ALS demo on master " + master + "..."
proxy_opt = ""
download_dataset = "rm -fR smallnetflix; mkdir smallnetflix; cd smallnetflix/; wget http://graphlab.org/wp-content/uploads/2013/07/smallnetflix_mm.validate.gz; #ugly, but we need to find a better place to host sample graphlab datasets wget http://graphlab.org/wp-content/uploads/2013/07/smallnetflix_mm.train_.gz; gunzip *.gz; mv smallnetflix_mm.train_ smallnetflix_mm.train #ugly, but wordpress does not allow .train file.. ;-( cd ..;"
download_dataset = "rm -fR smallnetflix; mkdir smallnetflix; cd smallnetflix/; wget -q http://graphlab.org/wp-content/uploads/2013/07/smallnetflix_mm.validate.gz; wget http://graphlab.org/wp-content/uploads/2013/07/smallnetflix_mm.train_.gz; gunzip *.gz; mv smallnetflix_mm.train_ smallnetflix_mm.train;cd ..;"
if opts.proxy_port != None:
proxy_opt = "-D " + opts.proxy_port
subprocess.check_call("""ssh -o StrictHostKeyChecking=no -i %s %s ubuntu@%s \"
Expand All @@ -619,7 +619,7 @@ def main():
master = master_nodes[0].public_dns_name
print "Running pagerank demo on master " + master + "..."
proxy_opt = ""
download_dataset = "rm -fR livejournal; mkdir livejournal; cd livejournal/; wget http://snap.stanford.edu/data/soc-LiveJournal1.txt.gz; gunzip *.gz; cd ..;"
download_dataset = "rm -fR livejournal; mkdir livejournal; cd livejournal/; wget -q http://snap.stanford.edu/data/soc-LiveJournal1.txt.gz; gunzip *.gz; cd ..;"
if opts.proxy_port != None:
proxy_opt = "-D " + opts.proxy_port
subprocess.check_call("""ssh -o StrictHostKeyChecking=no -i %s %s ubuntu@%s \"
Expand All @@ -632,7 +632,7 @@ def main():
master = master_nodes[0].public_dns_name
print "Running SVD demo on master " + master + "..."
proxy_opt = ""
download_dataset = "rm -fR livejournal; mkdir livejournal; cd livejournal/; wget http://snap.stanford.edu/data/soc-LiveJournal1.txt.gz; gunzip *.gz; cd ..;"
download_dataset = "rm -fR livejournal; mkdir livejournal; cd livejournal/; wget -q http://snap.stanford.edu/data/soc-LiveJournal1.txt.gz; gunzip *.gz; cd ..;"
if opts.proxy_port != None:
proxy_opt = "-D " + opts.proxy_port
subprocess.check_call("""ssh -o StrictHostKeyChecking=no -i %s %s ubuntu@%s \"
Expand Down

0 comments on commit cfef5c9

Please sign in to comment.