Skip to content

Commit

Permalink
Merge pull request #31 from OpenKBC/engineering_dev
Browse files Browse the repository at this point in the history
Engineering dev
  • Loading branch information
swiri021 authored Sep 28, 2021
2 parents 4e4bf80 + fa65a53 commit 4c6f8c1
Show file tree
Hide file tree
Showing 16 changed files with 307 additions and 84 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# s3 entire data
data/
*.pem
credentials

# MAC
.DS_Store
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile_jupyterNotebook
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM jupyter/datascience-notebook
FROM jupyter/base-notebook

COPY notebook/installers/installer_Rpackage.R /installer_Rpackage.R
COPY notebook/installers/requirements.txt /requirements.txt
Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,16 @@ docker-compose up # composing up
# Access jupyter notebook
http://localhost:8888/token_number
```

* Workflow container
```shell
#Not prepared controller yet, please get into the container to run snakemake workflow separately
docker exec -it container_name_of_pipelines bash
```

* AWS module
```shell
cd aws_module
sh aws_module.sh t2.medium
```

38 changes: 38 additions & 0 deletions aws_module/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
## AWS module for running the project
* This module supports to run the project codes, pipelines and analysis by launching AWS EC2 instance, currently, this module generates an EC2 instance in AWS by using github code and project S3 data. (Limit: below m5.4xlarge). The EC2 is launched with 200G volumes total initally(default).
* Please contact to members to have credentials to use this service.
* It contains AMI mapping.json for modifying initial storage size

### AWS AMI description
```
aws ec2 describe-images --image-ids ami-0f6304b1dde9413d6 #ubuntu 18.04 LTS with Docker
```

### Requirements on local PC
```
apt-get install awscli
apt-get install jq
```

### Usage on local PC
```
sh aws_module.sh t2.micro #with instance type(t2.micro for testing, maximum : m5.xlarge, m5.2xlarge)
```

### Requirements for docker
* This version has a problem with docker installment in AWS, and docker needs to be installed manually
```
ssh -i MSplatform-key.pem ubuntu@IP_ADDRESS
```

### File information
* InstanceLaunch-Info: This file contains standard EC2 information you launched (IP addr, AZ and etc)
* InstanceVolume-Info: This file contains volume information you launched
* PublicIP: This file contains public IP address of EC2 you launched
* MSplatform-key.pem: This is the key for ssh'ing to EC2

### Services
```
http://yourEC2URL/ # Pipeline Controller
http://yourEC2URL:8888/?token= # Pipeline Controller
```
25 changes: 25 additions & 0 deletions aws_module/aws_check_status.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash

## Purpose of this bash file is running while EC2 is ready. When it is ready, automatically it will be out
objectID=$1
objectState=dummy

if [ "$2" == "ec2" ]
then
while [ "$objectState" != "running" ];do # EC2 running checking
sleep 1
objStatuses=$(aws ec2 describe-instance-status --instance-id $objectID)
objectState=$( jq --jsonargs '.InstanceStatuses | .[] | .InstanceState.Name' <<< "${objStatuses}" )
objectState="${objectState%\"}" # Remove double quotes from string
objectState="${objectState#\"}" # Remove double quotes from string
done
elif [ "$2" == "ebs" ]
then
while [ "$objectState" != "available" ];do # EBS available checking
sleep 1
objStatuses=$(aws ec2 describe-volumes --volume-ids $objectID)
objectState=$( jq --jsonargs '.Volumes | .[] | .State' <<< "${objStatuses}" )
objectState="${objectState%\"}" # Remove double quotes from string
objectState="${objectState#\"}" # Remove double quotes from string
done
fi
87 changes: 87 additions & 0 deletions aws_module/aws_module.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#!/bin/bash

## Purpose of this temporary bash file is to calculate large size of data for the project,
## default data, codes and docker images will be launched on EC2 instance.
## Let members know if you want to get auth for using AWS(credentials), and personal use of EC2 is strictly prohibited.(It is monitored by admin)
## Parsing portion needs to be changed to aws tag work

securityGroupID=sg-08946d1b26a30d376 # default securityGroup for EC2(flask)
instanceType=$1 # t2.micro, m5.4xlarge for normal use
VolumeSize=100 # EBS volumne Size
InstanceInfoFile=InstanceLaunch-Info # Instance launch information
VolumeInfoFile=InstanceVolume-Info # Volume create information
PublicIPFile=PublicIP # Public IP information
PemKeyName=MSplatform-key

## Key gen for EC2. if new key is needed, please use this command lines
#aws ec2 create-key-pair --key-name $PemKeyName --query 'KeyMaterial' --output text > MSplatform-key.pem
## Change auth for pem key
#chmod -R 400 MSplatform-key.pem

## EC2 Instance launch with modifying block-device-mapping, ami-030cd17b75425e48d(plain ubuntu)
aws ec2 run-instances --image-id ami-0f6304b1dde9413d6 --block-device-mappings file://mapping_dockerAMI.json \
--instance-type $instanceType --security-group-ids $securityGroupID --key-name $PemKeyName > $InstanceInfoFile

InstanceIDLine=$(cat $InstanceInfoFile | grep 'InstanceId' | xargs) # Instance ID from info, stripping line
IFS=': ' read -r -a array <<< "$InstanceIDLine" # Split string
element=${array[1]} # extract ID
InstanceID=$(echo ${element/,/} | xargs) # Last cleanup of Instance ID string
echo "Instance ID: $InstanceID"

AZLine=$(cat $InstanceInfoFile | grep 'AvailabilityZone' | xargs) # Get Availability Zone
IFS=': ' read -r -a array <<< "$AZLine" # Split string
element=${array[1]} # extract ID
AvailabilityZone=$(echo ${element/,/} | xargs) # Last cleanup of Instance ID string
echo "Instance AZ: $AvailabilityZone"

#echo "Check ec2 status before create volume"
sh aws_check_status.sh $InstanceID ec2 # Check EC2 running

ip_addr=$(aws ec2 describe-instances --instance-ids $InstanceID --query 'Reservations[0].Instances[0].PublicIpAddress') # get public IP for EC2
ip_addr="${ip_addr%\"}" # Remove double quotes from string
ip_addr="${ip_addr#\"}" # Remove double quotes from string
echo "PublicIP: $ip_addr"
echo "PublicIP: $ip_addr" > $PublicIPFile

## Volume create (same AZ with EC2)
aws ec2 create-volume --availability-zone $AvailabilityZone --volume-type gp2 --size $VolumeSize > $VolumeInfoFile

VolumeIDLine=$(cat $VolumeInfoFile | grep 'VolumeId' | xargs) # Volume ID from info, stripping line
IFS=': ' read -r -a array <<< "$VolumeIDLine" # Split string
element=${array[1]} # extract ID
VolumeID=$(echo ${element/,/} | xargs) # Last cleanup of Instance ID string
echo "Volume ID: $VolumeID"

## Volume attach
echo "Check ebs status before attach-volume"
sh aws_check_status.sh $VolumeID ebs # Check EBS availability
if [ "$2" == "m5."* ];
then
echo "NVME volume"
aws ec2 attach-volume --volume-id $VolumeID --instance-id $InstanceID --device /dev/nvme1n1
storageType=nvme
else
aws ec2 attach-volume --volume-id $VolumeID --instance-id $InstanceID --device /dev/sdf
fi
sleep 30 # sleep while AWS is loading

## Running installer
ssh -i MSplatform-key.pem ubuntu@$ip_addr 'bash -s' < installer.sh

## Moving credentials to ec2 for s3 connection
scp -i MSplatform-key.pem credentials ubuntu@$ip_addr:/home/ubuntu/.aws

## S3 sync from S3 project bucket
ssh -i MSplatform-key.pem ubuntu@$ip_addr 'bash -s' < s3Sync.sh

#### Running something here
#### Running something here

#### Copy to S3 for resultFiles
#### Copy to S3 for resultFiles

#### Terminateing EC2 here
#aws ec2 stop-instances --instance-ids $InstanceID
#aws ec2 detach-volume --volume-id $VolumeID
#aws ec2 delete-volume --volume-id $VolumeID
#aws ec2 terminate-instances --instance-ids $InstanceID
7 changes: 7 additions & 0 deletions aws_module/docker_setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Install docker-compose
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose

# docker-compose up for containers
cd /home/ubuntu/MSProject/multiple_sclerosis_proj
sudo docker-compose -f docker-compose.AWS.yaml up --detach
22 changes: 22 additions & 0 deletions aws_module/installer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
storageType=$1 #storage type

# Make usable disk
sudo mkdir /home/ubuntu/MSProject
sudo apt update && sudo apt install git
sudo sudo apt-get install -y awscli

sudo mkfs -t ext4 /dev/nvme1n1 # format attached volume, attempt nvme format
sudo mkfs -t ext4 /dev/xvdf # format attached volume(/dev/sdf), attempt nvme format

sudo mount /dev/nvme1n1 /home/ubuntu/MSProject # Mount to project directory, attempt nvme mount
sudo mount /dev/xvdf /home/ubuntu/MSProject # Mount to project directory, attempt standard mount
#sudo chown -R ubuntu:ubuntu /home/ubuntu/MSProject

# Download all codes
cd /home/ubuntu/MSProject # go to working directory
sudo git clone https://github.com/OpenKBC/multiple_sclerosis_proj.git # git clone the code

# For AWS S3 credential
cd /home/ubuntu
sudo mkdir .aws
sudo chown -R ubuntu:ubuntu /home/ubuntu/.aws/
20 changes: 20 additions & 0 deletions aws_module/mapping_dockerAMI.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[
{
"DeviceName": "/dev/sda1",
"Ebs": {
"DeleteOnTermination": true,
"SnapshotId": "snap-0a2ad3b40ffe12e08",
"VolumeSize": 100,
"VolumeType": "gp2",
"Encrypted": false
}
},
{
"DeviceName": "/dev/sdb",
"VirtualName": "ephemeral0"
},
{
"DeviceName": "/dev/sdc",
"VirtualName": "ephemeral1"
}
]
20 changes: 20 additions & 0 deletions aws_module/mapping_plainAMI.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[
{
"DeviceName": "/dev/sda1",
"Ebs": {
"DeleteOnTermination": true,
"SnapshotId": "snap-04a6d5008d4a3d51f",
"VolumeSize": 100,
"VolumeType": "gp2",
"Encrypted": false
}
},
{
"DeviceName": "/dev/sdb",
"VirtualName": "ephemeral0"
},
{
"DeviceName": "/dev/sdc",
"VirtualName": "ephemeral1"
}
]
4 changes: 4 additions & 0 deletions aws_module/s3Sync.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cd /home/ubuntu/MSProject/multiple_sclerosis_proj # default project directory
sudo mkdir data/
sudo chown ubuntu:ubuntu data/
aws s3 sync s3://openkbc-ms-bucket/ data/ # sync to ec2
26 changes: 26 additions & 0 deletions docker-compose.AWS.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
version: "3"
services:
notebook: # Notebook
build:
context: .
dockerfile: Dockerfile_jupyterNotebook
volumes:
- /home/ubuntu/MSProject/multiple_sclerosis_proj/notebook/notebook_lib:/home/jovyan/work/notebook_lib
- /home/ubuntu/MSProject/multiple_sclerosis_proj/notebook/notebook_utils:/home/jovyan/work/notebook_utils
- /home/ubuntu/MSProject/multiple_sclerosis_proj/notebook/notebook_archive:/home/jovyan/work/notebook_archive
- /home/ubuntu/MSProject/multiple_sclerosis_proj/notebook/resultFiles:/home/jovyan/work/resultFiles
- /home/ubuntu/MSProject/multiple_sclerosis_proj/data:/home/jovyan/MainData
ports:
- 8888:8888
container_name: notebook-container:v1.0.0

pipelines: # Pipelines
build:
context: .
dockerfile: Dockerfile_SnakemakePipeline
volumes:
- /home/ubuntu/MSProject/multiple_sclerosis_proj/data:/MainData
- /home/ubuntu/MSProject/multiple_sclerosis_proj/notebook/resultFiles:/Output
ports:
- 80:5000
container_name: pipeline-container:v1.0.0
5 changes: 2 additions & 3 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
version: "3"
services:

notebook: # Notebook
build:
dockerfile: Dockerfile_jupyterNotebook
Expand All @@ -12,7 +11,7 @@ services:
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/data:/home/jovyan/MainData
ports:
- 8888:8888
container_name: notebook-container:v1.0.0
container_name: notebookContainer

pipelines: # Pipelines
build:
Expand All @@ -22,4 +21,4 @@ services:
- /Users/junheeyun/OpenKBC/multiple_sclerosis_proj/notebook/resultFiles:/Output
ports:
- 80:5000
container_name: pipeline-container:v1.0.0
container_name: pipelineContainer
62 changes: 1 addition & 61 deletions notebook/notebook_archive/Jun09262021/SVM_test.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=t)\n",
" X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=t)\n",
"\n",
" randomState = list(range(0,5))\n",
" #randomState = list(range(0,5))\n",
"\n",
" clf = SVC(kernel=\"linear\")\n",
" clf.fit(X_train, y_train)\n",
Expand Down Expand Up @@ -395,66 +395,6 @@
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 129,
"source": [
"X_test.shape"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(17, 200)"
]
},
"metadata": {},
"execution_count": 129
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 130,
"source": [
"X_val.shape"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(18, 200)"
]
},
"metadata": {},
"execution_count": 130
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 131,
"source": [
"X_train.shape"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(51, 200)"
]
},
"metadata": {},
"execution_count": 131
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
Loading

0 comments on commit 4c6f8c1

Please sign in to comment.