|
|
@@ -0,0 +1,451 @@ |
|
|
|
|
|
#!/bin/bash |
|
|
|
|
|
|
|
|
|
|
|
# Developers can run `hack/local-up.sh` to setup up a local environment: |
|
|
|
|
|
# 1. a local k8s cluster with a master node. |
|
|
|
|
|
# 2. a kubeedge node. |
|
|
|
|
|
# 3. our gm/lc. |
|
|
|
|
|
|
|
|
|
|
|
# Based on the kubeedge-local-up script which builds a local k8s cluster and kubeedge, |
|
|
|
|
|
# our local-up script installs our package locally for |
|
|
|
|
|
# simply developing and preparing for e3e tests. |
|
|
|
|
|
|
|
|
|
|
|
# It does: |
|
|
|
|
|
# 1. build the gm/lc/worker images. |
|
|
|
|
|
# 2. download kubeedge source code and run its localup script. |
|
|
|
|
|
# 3. prepare our k8s env. |
|
|
|
|
|
# 4. config gm config and start gm. |
|
|
|
|
|
# 5. start lc. |
|
|
|
|
|
# 6. add cleanup. |
|
|
|
|
|
|
|
|
|
|
|
# For cleanup, it needs to do our cleanups before kubeedge cleanup. |
|
|
|
|
|
# Otherwise lc cleanup (via kubectl delete) is stuck and lc is kept running. |
|
|
|
|
|
|
|
|
|
|
|
set -o errexit |
|
|
|
|
|
set -o nounset |
|
|
|
|
|
set -o pipefail |
|
|
|
|
|
|
|
|
|
|
|
NEPTUNE_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd -P)" |
|
|
|
|
|
|
|
|
|
|
|
cd "$NEPTUNE_ROOT" |
|
|
|
|
|
|
|
|
|
|
|
NO_CLEANUP=${NO_CLEANUP:-false} |
|
|
|
|
|
|
|
|
|
|
|
IMAGE_REPO=localhost/edgeai-neptune/neptune |
|
|
|
|
|
IMAGE_TAG=localup |
|
|
|
|
|
|
|
|
|
|
|
# local k8s cluster name for local-up-kubeedge.sh |
|
|
|
|
|
CLUSTER_NAME=neptune |
|
|
|
|
|
MASTER_NODENAME=${CLUSTER_NAME}-control-plane |
|
|
|
|
|
EDGE_NODENAME=edge-node |
|
|
|
|
|
NAMESPACE=neptune |
|
|
|
|
|
|
|
|
|
|
|
KUBEEDGE_VERSION=master |
|
|
|
|
|
TMP_DIR="$(realpath local-up-tmp)" |
|
|
|
|
|
|
|
|
|
|
|
GM_BIND_PORT=9000 |
|
|
|
|
|
LC_BIND_PORT=9100 |
|
|
|
|
|
|
|
|
|
|
|
arch() { |
|
|
|
|
|
local arch=$(uname -m) |
|
|
|
|
|
case "$arch" in |
|
|
|
|
|
x86_64) arch=amd64;; |
|
|
|
|
|
*);; |
|
|
|
|
|
esac |
|
|
|
|
|
echo "$arch" |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
download_and_extract_kubeedge() { |
|
|
|
|
|
|
|
|
|
|
|
[ -d kubeedge ] && return |
|
|
|
|
|
local version=${1:-$KUBEEDGE_VERSION} |
|
|
|
|
|
|
|
|
|
|
|
# master branch can't works with git clone --depth 1 |
|
|
|
|
|
git clone -b $version https://github.com/kubeedge/kubeedge |
|
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
# the archive file can't works since local-up-kubeedge.sh depends git tag |
|
|
|
|
|
# https://github.com/kubeedge/kubeedge/archive/${version}.tar.gz |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
get_kubeedge_pid() { |
|
|
|
|
|
ps -e -o pid,comm,args | |
|
|
|
|
|
grep -F "$TMP_DIR" | |
|
|
|
|
|
# match executable name and print the pid |
|
|
|
|
|
awk -v bin="${1:-edgecore}" 'NF=$2==bin' |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
localup_kubeedge() { |
|
|
|
|
|
pushd $TMP_DIR >/dev/null |
|
|
|
|
|
download_and_extract_kubeedge |
|
|
|
|
|
# without setsid when hits ctrl-c, edgecore/cloudclore will be terminated |
|
|
|
|
|
# before cleanup called. |
|
|
|
|
|
# but we need cloudcore/edgecore alive to clean our container(mainly lc), |
|
|
|
|
|
# so here new a session to run local-up-kubeedge.sh |
|
|
|
|
|
setsid bash -c " |
|
|
|
|
|
cd kubeedge |
|
|
|
|
|
|
|
|
|
|
|
# no use ENABLE_DAEMON=true since it has not-fully-cleanup problem. |
|
|
|
|
|
TIMEOUT=90 CLUSTER_NAME=$CLUSTER_NAME ENABLE_DAEMON=false |
|
|
|
|
|
source hack/local-up-kubeedge.sh |
|
|
|
|
|
" & |
|
|
|
|
|
KUBEEDGE_ROOT_PID=$! |
|
|
|
|
|
add_cleanup ' |
|
|
|
|
|
# for the case sometimes kube-proxy container in local machine |
|
|
|
|
|
# not cleanup. |
|
|
|
|
|
kubectl delete ds -n kube-system kube-proxy |
|
|
|
|
|
|
|
|
|
|
|
echo "found kubeedge pid, kill it: $KUBEEDGE_ROOT_PID" |
|
|
|
|
|
for((i=0;i<60;i++)); do |
|
|
|
|
|
((i%15==0)) && kill "$KUBEEDGE_ROOT_PID" |
|
|
|
|
|
kill -0 "$KUBEEDGE_ROOT_PID" || break |
|
|
|
|
|
echo "waiting for $KUBEEDGE_ROOT_PID exists" |
|
|
|
|
|
sleep 1 |
|
|
|
|
|
done |
|
|
|
|
|
# sometimes cloudcore/edgecore cant be stopped(one kill command |
|
|
|
|
|
# local-up-kubeedge.sh is not enough), |
|
|
|
|
|
# so to ensure this cleanup we clean it manully. |
|
|
|
|
|
for bin in cloudcore edgecore; do |
|
|
|
|
|
pid=$(get_kubeedge_pid $bin) |
|
|
|
|
|
if [ -n "$pid" ]; then |
|
|
|
|
|
echo "found $bin: $pid, kill it" |
|
|
|
|
|
kill $pid |
|
|
|
|
|
kill $pid |
|
|
|
|
|
fi |
|
|
|
|
|
done |
|
|
|
|
|
' |
|
|
|
|
|
|
|
|
|
|
|
# wait ${MASTER_NODENAME} container to be running |
|
|
|
|
|
while ! docker ps --filter=name=${MASTER_NODENAME} | grep -q ${MASTER_NODENAME}; do |
|
|
|
|
|
# errexit when kubeedge-local pid exited |
|
|
|
|
|
kill -0 "$KUBEEDGE_ROOT_PID" |
|
|
|
|
|
sleep 3 |
|
|
|
|
|
done |
|
|
|
|
|
|
|
|
|
|
|
# wait edgecore |
|
|
|
|
|
while [ -z "$(get_kubeedge_pid edgecore)" ]; do |
|
|
|
|
|
# errexit when kubeedge-local pid exited |
|
|
|
|
|
kill -0 "$KUBEEDGE_ROOT_PID" |
|
|
|
|
|
sleep 3 |
|
|
|
|
|
done |
|
|
|
|
|
|
|
|
|
|
|
local parent=$$ |
|
|
|
|
|
{ |
|
|
|
|
|
# healthcheck for kubeedge-local pid |
|
|
|
|
|
# if it died, we died. |
|
|
|
|
|
while true; do |
|
|
|
|
|
if ! kill -0 "$KUBEEDGE_ROOT_PID"; then |
|
|
|
|
|
kill -INT $parent |
|
|
|
|
|
break |
|
|
|
|
|
fi |
|
|
|
|
|
sleep 1 |
|
|
|
|
|
done |
|
|
|
|
|
}& |
|
|
|
|
|
popd |
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
build_component_image() { |
|
|
|
|
|
local bin |
|
|
|
|
|
for bin; do |
|
|
|
|
|
echo "building $bin image" |
|
|
|
|
|
make -C "${NEPTUNE_ROOT}" ${bin}image IMAGE_REPO=$IMAGE_REPO IMAGE_TAG=$IMAGE_TAG |
|
|
|
|
|
eval ${bin^^}_IMAGE="'${IMAGE_REPO}/${bin}:${IMAGE_TAG}'" |
|
|
|
|
|
done |
|
|
|
|
|
# no clean up for images |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
build_worker_base_images() { |
|
|
|
|
|
echo "building worker base images" |
|
|
|
|
|
# build tensorflow1.15 image |
|
|
|
|
|
WORKER_TF1_IMAGE=$IMAGE_REPO/worker-tensorflow:1.15 |
|
|
|
|
|
docker build -f build/worker/base_images/tensorflow/tensorflow-1.15.Dockerfile -t $WORKER_TF1_IMAGE . |
|
|
|
|
|
|
|
|
|
|
|
WORKER_IMAGE_HUB="'tensorflow:1.15': $WORKER_TF1_IMAGE" |
|
|
|
|
|
# add more base images |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
load_images_to_master() { |
|
|
|
|
|
local image |
|
|
|
|
|
for image in $GM_IMAGE; do |
|
|
|
|
|
# just use the docker-image command of kind instead of ctr |
|
|
|
|
|
# docker save $image | docker exec -i $MASTER_NODENAME ctr --namespace k8s.io image import - |
|
|
|
|
|
kind load --name $CLUSTER_NAME docker-image $image |
|
|
|
|
|
done |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
prepare_k8s_env() { |
|
|
|
|
|
kind get kubeconfig --name $CLUSTER_NAME > $TMP_DIR/kubeconfig |
|
|
|
|
|
export KUBECONFIG=$(realpath $TMP_DIR/kubeconfig) |
|
|
|
|
|
# prepare our k8s environment |
|
|
|
|
|
# create these crds including dataset, model, joint-inference etc. |
|
|
|
|
|
kubectl apply -f build/crds/neptune/ |
|
|
|
|
|
|
|
|
|
|
|
# gm, lc will be created in this namespace |
|
|
|
|
|
kubectl create namespace $NAMESPACE |
|
|
|
|
|
|
|
|
|
|
|
# create the cluster role for gm |
|
|
|
|
|
kubectl apply -f build/gm/rbac/ |
|
|
|
|
|
|
|
|
|
|
|
add_cleanup " |
|
|
|
|
|
kubectl delete -f build/crds/neptune/ |
|
|
|
|
|
kubectl delete namespace $NAMESPACE --timeout=5s |
|
|
|
|
|
" |
|
|
|
|
|
load_images_to_master |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
start_gm() { |
|
|
|
|
|
# config gm and start as pod |
|
|
|
|
|
|
|
|
|
|
|
pushd $TMP_DIR >/dev/null |
|
|
|
|
|
|
|
|
|
|
|
local gm_node_name=${MASTER_NODENAME} |
|
|
|
|
|
local gm_pod_name=gm-pod |
|
|
|
|
|
|
|
|
|
|
|
# prepare gm config |
|
|
|
|
|
cat > gmconfig <<EOF |
|
|
|
|
|
kubeConfig: "" |
|
|
|
|
|
namespace: "" |
|
|
|
|
|
imageHub: |
|
|
|
|
|
$WORKER_IMAGE_HUB |
|
|
|
|
|
websocket: |
|
|
|
|
|
port: $GM_BIND_PORT |
|
|
|
|
|
localController: |
|
|
|
|
|
server: http://localhost:$LC_BIND_PORT |
|
|
|
|
|
EOF |
|
|
|
|
|
|
|
|
|
|
|
add_cleanup "kubectl delete cm config -n $NAMESPACE" |
|
|
|
|
|
|
|
|
|
|
|
# create configmap for gm config |
|
|
|
|
|
kubectl create -n $NAMESPACE configmap config --from-file=gmconfig |
|
|
|
|
|
|
|
|
|
|
|
add_cleanup " |
|
|
|
|
|
kubectl delete deployment gm -n $NAMESPACE |
|
|
|
|
|
kubectl delete service gm -n $NAMESPACE |
|
|
|
|
|
" |
|
|
|
|
|
|
|
|
|
|
|
# start gm as pod with specified node name |
|
|
|
|
|
# TODO: create a k8s service, but kubeedge can't support this. |
|
|
|
|
|
kubectl create -f - <<EOF |
|
|
|
|
|
apiVersion: v1 |
|
|
|
|
|
kind: Service |
|
|
|
|
|
metadata: |
|
|
|
|
|
name: gm |
|
|
|
|
|
namespace: neptune |
|
|
|
|
|
spec: |
|
|
|
|
|
selector: |
|
|
|
|
|
app: gm |
|
|
|
|
|
type: NodePort |
|
|
|
|
|
ports: |
|
|
|
|
|
- protocol: TCP |
|
|
|
|
|
port: $GM_BIND_PORT |
|
|
|
|
|
targetPort: $GM_BIND_PORT |
|
|
|
|
|
--- |
|
|
|
|
|
apiVersion: apps/v1 |
|
|
|
|
|
kind: Deployment |
|
|
|
|
|
metadata: |
|
|
|
|
|
name: gm |
|
|
|
|
|
labels: |
|
|
|
|
|
app: gm |
|
|
|
|
|
namespace: neptune |
|
|
|
|
|
spec: |
|
|
|
|
|
replicas: 1 |
|
|
|
|
|
selector: |
|
|
|
|
|
matchLabels: |
|
|
|
|
|
app: gm |
|
|
|
|
|
template: |
|
|
|
|
|
metadata: |
|
|
|
|
|
labels: |
|
|
|
|
|
app: gm |
|
|
|
|
|
spec: |
|
|
|
|
|
nodeName: $gm_node_name |
|
|
|
|
|
serviceAccountName: neptune |
|
|
|
|
|
containers: |
|
|
|
|
|
- name: gm |
|
|
|
|
|
image: $GM_IMAGE |
|
|
|
|
|
command: ["neptune-gm", "--config", "/config/gmconfig", "-v2"] |
|
|
|
|
|
resources: |
|
|
|
|
|
requests: |
|
|
|
|
|
memory: 32Mi |
|
|
|
|
|
cpu: 100m |
|
|
|
|
|
limits: |
|
|
|
|
|
memory: 128Mi |
|
|
|
|
|
volumeMounts: |
|
|
|
|
|
- name: config |
|
|
|
|
|
mountPath: /config |
|
|
|
|
|
volumes: |
|
|
|
|
|
- name: config |
|
|
|
|
|
configMap: |
|
|
|
|
|
name: config |
|
|
|
|
|
EOF |
|
|
|
|
|
|
|
|
|
|
|
local gm_ip=$(kubectl get node $gm_node_name -o jsonpath='{ .status.addresses[?(@.type=="InternalIP")].address }') |
|
|
|
|
|
local gm_port=$(kubectl -n $NAMESPACE get svc gm -ojsonpath='{.spec.ports[0].nodePort}') |
|
|
|
|
|
|
|
|
|
|
|
GM_ADDRESS=$gm_ip:$gm_port |
|
|
|
|
|
|
|
|
|
|
|
add_debug_info "See GM status: kubectl get deploy -n $NAMESPACE gm" |
|
|
|
|
|
popd |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
start_lc() { |
|
|
|
|
|
local lc_ds_name=lc |
|
|
|
|
|
|
|
|
|
|
|
add_cleanup " |
|
|
|
|
|
# so here give a timeout in case edgecore is exited unexpectedly |
|
|
|
|
|
kubectl delete --timeout=5s ds lc -n neptune |
|
|
|
|
|
|
|
|
|
|
|
# if edgecore exited unexpectedly, we need to clean lc manually |
|
|
|
|
|
[ -z \"\$(get_kubeedge_pid edgecore)\" ] && { |
|
|
|
|
|
# TODO: find a better way to do this |
|
|
|
|
|
echo 'try to stop lc and its pause in edgenode manually' |
|
|
|
|
|
docker stop \$( |
|
|
|
|
|
docker ps | |
|
|
|
|
|
# find lc and its pause container id |
|
|
|
|
|
# kubeedge/k8s container name rule: |
|
|
|
|
|
# pod: k8s_${lc_ds_name}_{pod_name}_${NAMESPACE}_{pod_uid}_ |
|
|
|
|
|
# pause: k8s_POD_{pod_name}_${NAMESPACE}_{pause_uid}_ |
|
|
|
|
|
# where pod_name is ${lc_ds_name}-[a-z0-9]{5} |
|
|
|
|
|
grep 'k8s_.*_${lc_ds_name}-[a-z0-9]*_${NAMESPACE}_' | |
|
|
|
|
|
awk NF=1 |
|
|
|
|
|
) 2>/dev/null |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
" |
|
|
|
|
|
|
|
|
|
|
|
# start lc as daemonset |
|
|
|
|
|
kubectl create -f- <<EOF |
|
|
|
|
|
apiVersion: apps/v1 |
|
|
|
|
|
kind: DaemonSet |
|
|
|
|
|
metadata: |
|
|
|
|
|
labels: |
|
|
|
|
|
k8s-app: neptune-lc |
|
|
|
|
|
name: $lc_ds_name |
|
|
|
|
|
namespace: $NAMESPACE |
|
|
|
|
|
spec: |
|
|
|
|
|
selector: |
|
|
|
|
|
matchLabels: |
|
|
|
|
|
k8s-app: $lc_ds_name |
|
|
|
|
|
template: |
|
|
|
|
|
metadata: |
|
|
|
|
|
labels: |
|
|
|
|
|
k8s-app: $lc_ds_name |
|
|
|
|
|
spec: |
|
|
|
|
|
nodeSelector: |
|
|
|
|
|
# only schedule to edge node |
|
|
|
|
|
node-role.kubernetes.io/edge: "" |
|
|
|
|
|
containers: |
|
|
|
|
|
- name: $lc_ds_name |
|
|
|
|
|
image: $LC_IMAGE |
|
|
|
|
|
env: |
|
|
|
|
|
- name: GM_ADDRESS |
|
|
|
|
|
value: $GM_ADDRESS |
|
|
|
|
|
- name: BIND_PORT |
|
|
|
|
|
value: "$LC_BIND_PORT" |
|
|
|
|
|
- name: NODENAME |
|
|
|
|
|
valueFrom: |
|
|
|
|
|
fieldRef: |
|
|
|
|
|
fieldPath: spec.nodeName |
|
|
|
|
|
- name: ROOTFS_MOUNT_DIR |
|
|
|
|
|
# the value of ROOTFS_MOUNT_DIR is same with the mount path of volume |
|
|
|
|
|
value: /rootfs |
|
|
|
|
|
resources: |
|
|
|
|
|
requests: |
|
|
|
|
|
memory: 32Mi |
|
|
|
|
|
cpu: 100m |
|
|
|
|
|
limits: |
|
|
|
|
|
memory: 128Mi |
|
|
|
|
|
volumeMounts: |
|
|
|
|
|
- name: localcontroller |
|
|
|
|
|
mountPath: /rootfs |
|
|
|
|
|
volumes: |
|
|
|
|
|
- name: localcontroller |
|
|
|
|
|
hostPath: |
|
|
|
|
|
path: / |
|
|
|
|
|
hostNetwork: true |
|
|
|
|
|
EOF |
|
|
|
|
|
add_debug_info "See LC status: kubectl get ds -n $NAMESPACE $lc_ds_name" |
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
declare -a CLEANUP_CMDS=() |
|
|
|
|
|
add_cleanup() { |
|
|
|
|
|
CLEANUP_CMDS+=("$@") |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
cleanup() { |
|
|
|
|
|
if [[ "${NO_CLEANUP}" = true ]]; then |
|
|
|
|
|
echo "No clean up..." |
|
|
|
|
|
return |
|
|
|
|
|
fi |
|
|
|
|
|
|
|
|
|
|
|
set +o errexit |
|
|
|
|
|
|
|
|
|
|
|
echo "Cleaning up neptune..." |
|
|
|
|
|
|
|
|
|
|
|
local idx=${#CLEANUP_CMDS[@]} cmd |
|
|
|
|
|
# reverse call cleanup |
|
|
|
|
|
for((;--idx>=0;)); do |
|
|
|
|
|
cmd=${CLEANUP_CMDS[idx]} |
|
|
|
|
|
echo "calling $cmd:" |
|
|
|
|
|
eval "$cmd" |
|
|
|
|
|
done |
|
|
|
|
|
|
|
|
|
|
|
set -o errexit |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
check_healthy() { |
|
|
|
|
|
# TODO |
|
|
|
|
|
true |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
debug_infos="" |
|
|
|
|
|
add_debug_info() { |
|
|
|
|
|
debug_infos+="$@ |
|
|
|
|
|
" |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
check_prerequisites() { |
|
|
|
|
|
# TODO |
|
|
|
|
|
true |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
NO_COLOR='\033[0m' |
|
|
|
|
|
RED='\033[0;31m' |
|
|
|
|
|
GREEN='\033[0;32m' |
|
|
|
|
|
green_text() { |
|
|
|
|
|
echo -ne "$GREEN$@$NO_COLOR" |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
red_text() { |
|
|
|
|
|
echo -ne "$RED$@$NO_COLOR" |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
trap cleanup EXIT |
|
|
|
|
|
|
|
|
|
|
|
cleanup |
|
|
|
|
|
|
|
|
|
|
|
mkdir -p "$TMP_DIR" |
|
|
|
|
|
add_cleanup 'rm -rf "$TMP_DIR"' |
|
|
|
|
|
|
|
|
|
|
|
build_component_image gm lc |
|
|
|
|
|
build_worker_base_images |
|
|
|
|
|
|
|
|
|
|
|
check_prerequisites |
|
|
|
|
|
|
|
|
|
|
|
localup_kubeedge |
|
|
|
|
|
|
|
|
|
|
|
prepare_k8s_env |
|
|
|
|
|
|
|
|
|
|
|
start_gm |
|
|
|
|
|
start_lc |
|
|
|
|
|
|
|
|
|
|
|
echo "Local Neptune cluster is $(green_text running). |
|
|
|
|
|
Currently local-up script only support foreground running. |
|
|
|
|
|
Press $(red_text Ctrl-C) to shut it down! |
|
|
|
|
|
|
|
|
|
|
|
You can use it with: kind export kubeconfig --name ${CLUSTER_NAME} |
|
|
|
|
|
|
|
|
|
|
|
$debug_infos |
|
|
|
|
|
" |
|
|
|
|
|
|
|
|
|
|
|
while check_healthy; do sleep 5; done |