Skip to content

Commit

Permalink
rebuild specially for pepper project
Browse files Browse the repository at this point in the history
Signed-off-by: cbh778899 <[email protected]>
  • Loading branch information
cbh778899 committed Sep 25, 2024
1 parent b4c79a9 commit 15b3ac5
Show file tree
Hide file tree
Showing 35 changed files with 1,088 additions and 3,703 deletions.
14 changes: 0 additions & 14 deletions .env
Original file line number Diff line number Diff line change
@@ -1,16 +1,2 @@
APP_PORT=8000
APP_EXPOSE_PORT=8000
ENG_ACCESS_PORT=8080
MODEL_SAVE_PATH=volumes/models
DATABASE_BIND_PATH=./lancedb
INFERENCE_ENG=llamacpp
INFERENCE_ENG_PORT=8080
INFERENCE_ENG_VERSION=server--b1-27d4b7c
NUM_CPU_CORES=8.00
NUM_THREADS_COUNT=8.00
EMBEDDING_ENG=embedding_eng
EMBEDDING_ENG_PORT=8081
NUM_CPU_CORES_EMBEDDING=4.00
NUM_THREAD_COUNTS_EMBEDDING=4.00
LANGUAGE_MODEL_NAME=Phi3-mini-4k-instruct-Q4.gguf
EMBEDDING_MODEL_NAME=all-MiniLM-L6-v2-Q4_K_M-v2.gguf
6 changes: 3 additions & 3 deletions .env.production
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ ALLOW_ORIGIN="*"
STATIC_API_KEY_ENABLED=0
ENABLE_PLUGIN=0
ENABLE_HTTPS=0
HTTPS_KEY_PATH="*/privkey.pem"
HTTPS_CERT_PATH="*/cert.pem"
HTTPS_CA_PATH="*/chain.pem"
HTTPS_KEY_PATH="privkey.pem"
HTTPS_CERT_PATH="cert.pem"
HTTPS_CA_PATH="chain.pem"
SYSTEM_INSTRUCTION="*"
LOAD_DEFAULT_DATASET=0
DEFAULT_DATASET_NAME="production_dataset"
Expand Down
67 changes: 8 additions & 59 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,29 +1,12 @@
# project related
PROJECT_NAME:=voyager
CONTAINER_NAME:=voyager:v0.2.0
APP_PORT:=8000
APP_EXPOSE_PORT:=8000
# compose build related

ENV_FILE:=.env

ENG_ACCESS_PORT:=8080
MODEL_SAVE_PATH:=volumes/models
APP_PORT:=8000
DATABASE_BIND_PATH:=./lancedb

INFERENCE_ENG:=llamacpp
INFERENCE_ENG_PORT:=8080
INFERENCE_ENG_VERSION:=server--b1-27d4b7c
NUM_CPU_CORES:=8.00
NUM_THREADS_COUNT:=8.00

EMBEDDING_ENG:=embedding_eng
EMBEDDING_ENG_PORT:=8081
NUM_CPU_CORES_EMBEDDING:=4.00
NUM_THREAD_COUNTS_EMBEDDING:=4.00
LANGUAGE_MODEL_NAME:=ft-smollm-135M-instruct-on-hf-ultrafeedback-f16.gguf
LANGUAGE_MODEL_URL:=https://huggingface.co/aisuko/ft-smollm-135M-instruct-on-hf-ultrafeedback-gguf/resolve/main/ft-smollm-135M-instruct-on-hf-ultrafeedback-f16.gguf
EMBEDDING_MODEL_NAME:=all-MiniLM-L6-v2-Q4_K_M-v2.gguf
EMBEDDING_MODEL_URL:=https://huggingface.co/aisuko/all-MiniLM-L6-v2-gguf/resolve/main/all-MiniLM-L6-v2-Q4_K_M-v2.gguf?download=true
REGION:=ap-southeast-2

# build and run this service only
.PHONY: build
Expand All @@ -32,36 +15,17 @@ build:

.PHONY: run
run: build
@docker run --rm -p $(PORT):$(PORT) --name $(PROJECT_NAME) $(CONTAINER_NAME)
@docker run --rm -p $(APP_PORT):$(APP_PORT) --name $(PROJECT_NAME) $(CONTAINER_NAME)

# compose build with llamacpp
.PHONY: env
env:
@echo "APP_PORT=$(APP_PORT)"> $(ENV_FILE)
@echo "APP_EXPOSE_PORT=$(APP_EXPOSE_PORT)">> $(ENV_FILE)
@echo "ENG_ACCESS_PORT=$(ENG_ACCESS_PORT)">> $(ENV_FILE)
@echo "MODEL_SAVE_PATH=$(MODEL_SAVE_PATH)">> $(ENV_FILE)
@echo "DATABASE_BIND_PATH=$(DATABASE_BIND_PATH)">> $(ENV_FILE)
@echo "INFERENCE_ENG=$(INFERENCE_ENG)">> $(ENV_FILE)
@echo "INFERENCE_ENG_PORT=$(INFERENCE_ENG_PORT)">> $(ENV_FILE)
@echo "INFERENCE_ENG_VERSION=$(INFERENCE_ENG_VERSION)">> $(ENV_FILE)
@echo "NUM_CPU_CORES=$(NUM_CPU_CORES)">> $(ENV_FILE)
@echo "NUM_THREADS_COUNT=$(NUM_THREADS_COUNT)">> $(ENV_FILE)
@echo "EMBEDDING_ENG=$(EMBEDDING_ENG)">> $(ENV_FILE)
@echo "EMBEDDING_ENG_PORT=$(EMBEDDING_ENG_PORT)">> $(ENV_FILE)
@echo "NUM_CPU_CORES_EMBEDDING=$(NUM_CPU_CORES_EMBEDDING)">> $(ENV_FILE)
@echo "NUM_THREAD_COUNTS_EMBEDDING=$(NUM_THREAD_COUNTS_EMBEDDING)">> $(ENV_FILE)
@echo "LANGUAGE_MODEL_NAME=$(LANGUAGE_MODEL_NAME)">> $(ENV_FILE)
@echo "EMBEDDING_MODEL_NAME=$(EMBEDDING_MODEL_NAME)">> $(ENV_FILE)

.PHONY: model-prepare
model-prepare:
@mkdir -p $(MODEL_SAVE_PATH) && [ -f $(MODEL_SAVE_PATH)/$(LANGUAGE_MODEL_NAME) ] || wget -O $(MODEL_SAVE_PATH)/$(LANGUAGE_MODEL_NAME) $(LANGUAGE_MODEL_URL)
@mkdir -p $(MODEL_SAVE_PATH) && [ -f $(MODEL_SAVE_PATH)/$(EMBEDDING_MODEL_NAME) ] || wget -O $(MODEL_SAVE_PATH)/$(EMBEDDING_MODEL_NAME) $(EMBEDDING_MODEL_URL)
@echo "REGION=$(REGION)">> $(ENV_FILE)

# normal build & up
.PHONY: compose-build
compose-build: env model-prepare
compose-build: env
@docker compose -f docker-compose.yaml build

.PHONY: up
Expand All @@ -70,7 +34,7 @@ up: compose-build

# dev build & up
.PHONY: compose-build-dev
compose-build-dev: env model-prepare
compose-build-dev: env
@docker compose -f docker-compose-dev.yaml build

.PHONY: dev
Expand All @@ -80,19 +44,4 @@ dev: compose-build-dev
# stop
.PHONY: stop
stop:
docker compose stop

#########################################################################################
# testing

.PHONY: pytest
pytest:
@python3 -m pytest -v

#########################################################################################
# setup

.PHONY: setup
setup:
gcc setup/setup.c -o setup/setup
setup/setup
docker compose stop
103 changes: 5 additions & 98 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,104 +1,11 @@
# VOYAGER
The project is OpenAI-like API service of SkywardAI ecosystem.

[![Linter and Builder 🚀](https://github.com/SkywardAI/voyager/actions/workflows/linter-builder-checker.yml/badge.svg)](https://github.com/SkywardAI/voyager/actions/workflows/linter-builder-checker.yml)
[![Release Drafter 🚀](https://github.com/SkywardAI/voyager/actions/workflows/release-drafter.yml/badge.svg)](https://github.com/SkywardAI/voyager/actions/workflows/release-drafter.yml)
[![Release Image 🚀](https://github.com/SkywardAI/voyager/actions/workflows/release-image.yml/badge.svg)](https://github.com/SkywardAI/voyager/actions/workflows/release-image.yml)
Voyager Specially build for RMIT Race Hub Pepper Robot project

## BUILD & RUN

### Setup and API usage demo video

https://github.com/user-attachments/assets/fa7059c2-309e-486a-a28a-45867613c84b

### CLI 💥
Introducing our new CLI tool!
> Make sure you can normally run `make`, `docker compose`, `gcc`, `sh` in your host machine.
Simply run `make setup` in the root folder to compile & run the CLI tool.

Don't want to set? Directly go to `Save & Build` menu in it and use `Build and start the server` option to load the app in default settings.

No gcc compiler? You can choose to compile the file `/setup/setup.c` yourself.

Explore it yourself to find more settings!


### Local Machine
* Please make sure you installed `Node.js` on your local machine.
* This project developed on Node Version `v20.15.0`.

```shell
# Manage package by pnpm
# Install pnpm globally, or change it to your local machine location
npm install -g pnpm

# Install dependencies
pnpm install
# OR
npm install

# RUN
npm run
```

### Container
* Please make sure you have `docker` and `make` installed in your server.
* Docker version for testing is `27.0.3, build 7d4bcd8`.
```shell
# to simply start with all needed containers started, please run
Simply run
```sh
make up
# if you just want to build this project to docker container, please run
make build
# if you want to start only this project in docker, please run
make start
# PLEASE NOTE: make start will automatically run make build first

# to run a container bind to your local machine volume, run
# OR
make dev
# this will do the same thing as `make up` but allows you to make changes and sync with container
```
**NOTE:** `make dev` Requires Node.js environment installed, or at least have `node_modules` specified in `package.json` installed on your server. Please see [Local Machine](#local-machine) section.

## Lint
To start lint your code, simply run
```shell
npm run lint
```

## APIs

### Docs
Go to the url of your project, default [http://localhost:8000](http://localhost:8000) if you didn't disabled the `Docs` route, then you can see docs and try it on.
See [demo video](#setup-and-api-usage-demo-video).

### Monitor
This project got monitor build with swagger-stats, when you got this project running, just go to `<Your Server>:<Your Port>/stats`.
For example, [http://localhost:8000/stats](http://localhost:8000/stats)

### Chatbox
> When you set up the project and didn't disabled the `chatbox` API, you can get a quick-setup chatbot with some basic styles on your own website, which calls the `/v1/chat/completions` API for inference.
To set it up, simply add
```html
<script src='http://localhost:8000/chatbox' defer></script>
```
into the bottom of your html body element. So easy!

If you want to hide the real link, in your javascript code you can do
```js
const chatbox_script = await (await fetch("http://localhost:8000/chatbox")).blob();
const chatbox_url = URL.createObjectURL(chatbox_script);
const script_elem = document.createElement('script');
script_elem.src = chatbox_url;
document.body.append(script_elem);
```
And remember to use `URL.revokeObjectURL(chatbox_url)` if you don't need it anymore.

Extra parameters ([request query](https://en.wikipedia.org/wiki/Query_string)) you can add to it are:
* `base_url`: `String`
> Add this when in production, otherwise the requests won't send to correct route.
> Default `http://localhost:8000`.
* `max_tokens`: `Integer`
> Add this when you want to limit tokens can be generated, is useful in production.
> Default `128`
To build docker containers
63 changes: 34 additions & 29 deletions actions/inference.js
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ function retrieveData(req_header, req_body) {
return { error: true, status: 422, message: "Messages not given!" }
}

// format what AWS Bedrock SDK can recognize
messages = messages.map(({role, content})=>{
return { role, content: [{text: content}] }
})

// apply n_predict value
if(!max_tokens) max_tokens = 128;
request_body.n_predict = max_tokens;
Expand Down Expand Up @@ -148,37 +153,37 @@ export async function chatCompletion(req, res) {
const { api_key, model, system_fingerprint, request_body, messages } = body
const isStream = !!request_body.stream;

if(+process.env.ENABLE_PLUGIN) {
const latest_message = messages.filter(e=>e.role === 'user').pop()
const { type, value } = await userMessageHandler({
"message": latest_message ? latest_message.content : "",
"full_hisoty": messages
});
// if(+process.env.ENABLE_PLUGIN) {
// const latest_message = messages.filter(e=>e.role === 'user').pop()
// const { type, value } = await userMessageHandler({
// "message": latest_message ? latest_message.content : "",
// "full_hisoty": messages
// });

switch(type) {
case "error":
res.status(403).send(value);
return;
case "replace_resp":
res.send(generateResponseContent(
api_key, 'chat.completion', model, system_fingerprint,
isStream, value, true
));
return;
case "history":
request_body.prompt = formatOpenAIContext(value);
break;
case "system_instruction":
messages.push({role:"system", content: value});
break;
case "normal": default:
break;
}
}
// switch(type) {
// case "error":
// res.status(403).send(value);
// return;
// case "replace_resp":
// res.send(generateResponseContent(
// api_key, 'chat.completion', model, system_fingerprint,
// isStream, value, true
// ));
// return;
// case "history":
// request_body.prompt = formatOpenAIContext(value);
// break;
// case "system_instruction":
// messages.push({role:"system", content: value});
// break;
// case "normal": default:
// break;
// }
// }

if(!request_body.prompt) {
request_body.prompt = formatOpenAIContext(messages);
}
// if(!request_body.prompt) {
// request_body.prompt = formatOpenAIContext(messages);
// }

if(isStream) {
res.setHeader("Content-Type", "text/event-stream");
Expand Down
48 changes: 0 additions & 48 deletions docker-compose-adv.yaml

This file was deleted.

Loading

0 comments on commit 15b3ac5

Please sign in to comment.