You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

local-up.sh 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. #!/bin/bash
  2. # Copyright 2021 The KubeEdge Authors.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. # Developers can run `hack/local-up.sh` to setup up a local environment:
  16. # 1. a local k8s cluster with a master node.
  17. # 2. a kubeedge node.
  18. # 3. our gm/lc.
  19. # Based on the kubeedge-local-up script which builds a local k8s cluster and kubeedge,
  20. # our local-up script installs our package locally for
  21. # simply developing and preparing for e3e tests.
  22. # It does:
  23. # 1. build the gm/lc/worker images.
  24. # 2. download kubeedge source code and run its localup script.
  25. # 3. prepare our k8s env.
  26. # 4. config gm config and start gm.
  27. # 5. start lc.
  28. # 6. add cleanup.
  29. # For cleanup, it needs to do our cleanups before kubeedge cleanup.
  30. # Otherwise lc cleanup (via kubectl delete) is stuck and lc is kept running.
  31. set -o errexit
  32. set -o nounset
  33. set -o pipefail
  34. SEDNA_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd -P)"
  35. cd "$SEDNA_ROOT"
  36. NO_CLEANUP=${NO_CLEANUP:-false}
  37. IMAGE_REPO=localhost/kubeedge/sedna
  38. IMAGE_TAG=localup
  39. # local k8s cluster name for local-up-kubeedge.sh
  40. CLUSTER_NAME=sedna
  41. MASTER_NODENAME=${CLUSTER_NAME}-control-plane
  42. EDGE_NODENAME=edge-node
  43. NAMESPACE=sedna
  44. TMP_DIR="$(realpath local-up-tmp)"
  45. GM_BIND_PORT=9000
  46. LC_BIND_PORT=9100
  47. arch() {
  48. local arch=$(uname -m)
  49. case "$arch" in
  50. x86_64) arch=amd64;;
  51. *);;
  52. esac
  53. echo "$arch"
  54. }
  55. get_latest_version() {
  56. # get the latest version of specified gh repo
  57. local repo=${1}
  58. # output of this latest page:
  59. # ...
  60. # "tag_name": "v1.0.0",
  61. # ...
  62. curl -s https://api.github.com/repos/$repo/releases/latest | awk '/"tag_name":/&&$0=$2' | sed 's/[",]//g'
  63. }
  64. download_and_extract_kubeedge() {
  65. [ -d kubeedge ] && return
  66. local version=${KUBEEDGE_VERSION:-$(get_latest_version kubeedge/kubeedge)}
  67. git clone -b $version --depth 1 https://github.com/kubeedge/kubeedge
  68. return
  69. # the archive file can't works since local-up-kubeedge.sh depends git tag
  70. # https://github.com/kubeedge/kubeedge/archive/${version}.tar.gz
  71. }
  72. get_kubeedge_pid() {
  73. ps -e -o pid,comm,args |
  74. grep -F "$TMP_DIR" |
  75. # match executable name and print the pid
  76. awk -v bin="${1:-edgecore}" 'NF=$2==bin'
  77. }
  78. localup_kubeedge() {
  79. pushd $TMP_DIR >/dev/null
  80. download_and_extract_kubeedge
  81. # without setsid when hits ctrl-c, edgecore/cloudclore will be terminated
  82. # before cleanup called.
  83. # but we need cloudcore/edgecore alive to clean our container(mainly lc),
  84. # so here new a session to run local-up-kubeedge.sh
  85. setsid bash -c "
  86. cd kubeedge
  87. # no use ENABLE_DAEMON=true since it has not-fully-cleanup problem.
  88. TIMEOUT=90 CLUSTER_NAME=$CLUSTER_NAME ENABLE_DAEMON=false
  89. #
  90. # here unset OUT_DIR
  91. # since local-up-kubeedge.sh needs default coded OUT_DIR
  92. unset OUT_DIR
  93. source hack/local-up-kubeedge.sh
  94. " &
  95. KUBEEDGE_ROOT_PID=$!
  96. add_cleanup '
  97. # for the case sometimes kube-proxy container in local machine
  98. # not cleanup.
  99. kubectl delete ds -n kube-system kube-proxy
  100. echo "found kubeedge pid, kill it: $KUBEEDGE_ROOT_PID"
  101. for((i=0;i<60;i++)); do
  102. ((i%15==0)) && kill "$KUBEEDGE_ROOT_PID"
  103. kill -0 "$KUBEEDGE_ROOT_PID" || break
  104. echo "waiting for $KUBEEDGE_ROOT_PID exists"
  105. sleep 1
  106. done
  107. # sometimes cloudcore/edgecore cant be stopped(one kill command
  108. # local-up-kubeedge.sh is not enough),
  109. # so to ensure this cleanup we clean it manully.
  110. for bin in cloudcore edgecore; do
  111. pid=$(get_kubeedge_pid $bin)
  112. if [ -n "$pid" ]; then
  113. echo "found $bin: $pid, try to kill it"
  114. # cloudcore/edgecore is started by sudo
  115. sudo kill $pid
  116. sudo kill $pid
  117. fi
  118. done
  119. '
  120. # wait ${MASTER_NODENAME} container to be running
  121. while ! docker ps --filter=name=${MASTER_NODENAME} | grep -q ${MASTER_NODENAME}; do
  122. # errexit when kubeedge-local pid exited
  123. kill -0 "$KUBEEDGE_ROOT_PID"
  124. sleep 3
  125. done
  126. # wait edgecore
  127. while [ -z "$(get_kubeedge_pid edgecore)" ]; do
  128. # errexit when kubeedge-local pid exited
  129. kill -0 "$KUBEEDGE_ROOT_PID"
  130. sleep 3
  131. done
  132. local parent=$$
  133. {
  134. # healthcheck for kubeedge-local pid
  135. # if it died, we died.
  136. while true; do
  137. if ! kill -0 "$KUBEEDGE_ROOT_PID"; then
  138. kill -INT $parent
  139. break
  140. fi
  141. sleep 1
  142. done
  143. }&
  144. popd
  145. }
  146. build_component_image() {
  147. local bin
  148. for bin; do
  149. echo "building $bin image"
  150. make -C "${SEDNA_ROOT}" ${bin}image IMAGE_REPO=$IMAGE_REPO IMAGE_TAG=$IMAGE_TAG
  151. eval ${bin^^}_IMAGE="'${IMAGE_REPO}/${bin}:${IMAGE_TAG}'"
  152. done
  153. # no clean up for images
  154. }
  155. build_worker_base_images() {
  156. echo "building worker base images"
  157. # build tensorflow1.15 image
  158. WORKER_TF1_IMAGE=$IMAGE_REPO/worker-tensorflow:1.15
  159. docker build -f build/worker/base_images/tensorflow/tensorflow-1.15.Dockerfile -t $WORKER_TF1_IMAGE .
  160. WORKER_IMAGE_HUB="'tensorflow:1.15': $WORKER_TF1_IMAGE"
  161. # add more base images
  162. }
  163. load_images_to_master() {
  164. local image
  165. for image in $GM_IMAGE; do
  166. # just use the docker-image command of kind instead of ctr
  167. # docker save $image | docker exec -i $MASTER_NODENAME ctr --namespace k8s.io image import -
  168. kind load --name $CLUSTER_NAME docker-image $image
  169. done
  170. }
  171. prepare_k8s_env() {
  172. kind get kubeconfig --name $CLUSTER_NAME > $TMP_DIR/kubeconfig
  173. export KUBECONFIG=$(realpath $TMP_DIR/kubeconfig)
  174. # prepare our k8s environment
  175. # create these crds including dataset, model, joint-inference etc.
  176. kubectl apply -f build/crds/sedna/
  177. # gm, lc will be created in this namespace
  178. kubectl create namespace $NAMESPACE
  179. # create the cluster role for gm
  180. kubectl apply -f build/gm/rbac/
  181. add_cleanup "
  182. kubectl delete -f build/crds/sedna/
  183. kubectl delete namespace $NAMESPACE --timeout=5s
  184. "
  185. load_images_to_master
  186. }
  187. start_gm() {
  188. # config gm and start as pod
  189. pushd $TMP_DIR >/dev/null
  190. local gm_node_name=${MASTER_NODENAME}
  191. local gm_pod_name=gm-pod
  192. # prepare gm config
  193. cat > gmconfig <<EOF
  194. kubeConfig: ""
  195. namespace: ""
  196. imageHub:
  197. ${WORKER_IMAGE_HUB:-}
  198. websocket:
  199. port: $GM_BIND_PORT
  200. localController:
  201. server: http://localhost:$LC_BIND_PORT
  202. EOF
  203. add_cleanup "kubectl delete cm config -n $NAMESPACE"
  204. # create configmap for gm config
  205. kubectl create -n $NAMESPACE configmap config --from-file=gmconfig
  206. add_cleanup "
  207. kubectl delete deployment gm -n $NAMESPACE
  208. kubectl delete service gm -n $NAMESPACE
  209. "
  210. # start gm as pod with specified node name
  211. # TODO: create a k8s service, but kubeedge can't support this.
  212. kubectl create -f - <<EOF
  213. apiVersion: v1
  214. kind: Service
  215. metadata:
  216. name: gm
  217. namespace: sedna
  218. spec:
  219. selector:
  220. app: gm
  221. type: NodePort
  222. ports:
  223. - protocol: TCP
  224. port: $GM_BIND_PORT
  225. targetPort: $GM_BIND_PORT
  226. ---
  227. apiVersion: apps/v1
  228. kind: Deployment
  229. metadata:
  230. name: gm
  231. labels:
  232. app: gm
  233. namespace: sedna
  234. spec:
  235. replicas: 1
  236. selector:
  237. matchLabels:
  238. app: gm
  239. template:
  240. metadata:
  241. labels:
  242. app: gm
  243. spec:
  244. nodeName: $gm_node_name
  245. serviceAccountName: sedna
  246. containers:
  247. - name: gm
  248. image: $GM_IMAGE
  249. command: ["sedna-gm", "--config", "/config/gmconfig", "-v2"]
  250. resources:
  251. requests:
  252. memory: 32Mi
  253. cpu: 100m
  254. limits:
  255. memory: 128Mi
  256. volumeMounts:
  257. - name: config
  258. mountPath: /config
  259. volumes:
  260. - name: config
  261. configMap:
  262. name: config
  263. EOF
  264. local gm_ip=$(kubectl get node $gm_node_name -o jsonpath='{ .status.addresses[?(@.type=="InternalIP")].address }')
  265. local gm_port=$(kubectl -n $NAMESPACE get svc gm -ojsonpath='{.spec.ports[0].nodePort}')
  266. GM_ADDRESS=$gm_ip:$gm_port
  267. add_debug_info "See GM status: kubectl get deploy -n $NAMESPACE gm"
  268. popd
  269. }
  270. start_lc() {
  271. local lc_ds_name=lc
  272. add_cleanup "
  273. # so here give a timeout in case edgecore is exited unexpectedly
  274. kubectl delete --timeout=5s ds lc -n sedna
  275. # if edgecore exited unexpectedly, we need to clean lc manually
  276. [ -z \"\$(get_kubeedge_pid edgecore)\" ] && {
  277. # TODO: find a better way to do this
  278. echo 'try to stop lc and its pause in edgenode manually'
  279. docker stop \$(
  280. docker ps |
  281. # find lc and its pause container id
  282. # kubeedge/k8s container name rule:
  283. # pod: k8s_${lc_ds_name}_{pod_name}_${NAMESPACE}_{pod_uid}_
  284. # pause: k8s_POD_{pod_name}_${NAMESPACE}_{pause_uid}_
  285. # where pod_name is ${lc_ds_name}-[a-z0-9]{5}
  286. grep 'k8s_.*_${lc_ds_name}-[a-z0-9]*_${NAMESPACE}_' |
  287. awk NF=1
  288. ) 2>/dev/null
  289. }
  290. "
  291. # start lc as daemonset
  292. kubectl create -f- <<EOF
  293. apiVersion: apps/v1
  294. kind: DaemonSet
  295. metadata:
  296. labels:
  297. k8s-app: sedna-lc
  298. name: $lc_ds_name
  299. namespace: $NAMESPACE
  300. spec:
  301. selector:
  302. matchLabels:
  303. k8s-app: $lc_ds_name
  304. template:
  305. metadata:
  306. labels:
  307. k8s-app: $lc_ds_name
  308. spec:
  309. nodeSelector:
  310. # only schedule to edge node
  311. node-role.kubernetes.io/edge: ""
  312. containers:
  313. - name: $lc_ds_name
  314. image: $LC_IMAGE
  315. env:
  316. - name: GM_ADDRESS
  317. value: $GM_ADDRESS
  318. - name: BIND_PORT
  319. value: "$LC_BIND_PORT"
  320. - name: NODENAME
  321. valueFrom:
  322. fieldRef:
  323. fieldPath: spec.nodeName
  324. - name: ROOTFS_MOUNT_DIR
  325. # the value of ROOTFS_MOUNT_DIR is same with the mount path of volume
  326. value: /rootfs
  327. resources:
  328. requests:
  329. memory: 32Mi
  330. cpu: 100m
  331. limits:
  332. memory: 128Mi
  333. volumeMounts:
  334. - name: localcontroller
  335. mountPath: /rootfs
  336. volumes:
  337. - name: localcontroller
  338. hostPath:
  339. path: /
  340. hostNetwork: true
  341. EOF
  342. add_debug_info "See LC status: kubectl get ds -n $NAMESPACE $lc_ds_name"
  343. }
  344. declare -a CLEANUP_CMDS=()
  345. add_cleanup() {
  346. CLEANUP_CMDS+=("$@")
  347. }
  348. cleanup() {
  349. if [[ "${NO_CLEANUP}" = true ]]; then
  350. echo "No clean up..."
  351. return
  352. fi
  353. set +o errexit
  354. echo "Cleaning up sedna..."
  355. local idx=${#CLEANUP_CMDS[@]} cmd
  356. # reverse call cleanup
  357. for((;--idx>=0;)); do
  358. cmd=${CLEANUP_CMDS[idx]}
  359. echo "calling $cmd:"
  360. eval "$cmd"
  361. done
  362. set -o errexit
  363. }
  364. check_healthy() {
  365. # TODO
  366. true
  367. }
  368. debug_infos=""
  369. add_debug_info() {
  370. debug_infos+="$@
  371. "
  372. }
  373. check_prerequisites() {
  374. # TODO
  375. true
  376. }
  377. NO_COLOR='\033[0m'
  378. RED='\033[0;31m'
  379. GREEN='\033[0;32m'
  380. green_text() {
  381. echo -ne "$GREEN$@$NO_COLOR"
  382. }
  383. red_text() {
  384. echo -ne "$RED$@$NO_COLOR"
  385. }
  386. fix_path() {
  387. # since we depends some tools in $GOPATH/bin,
  388. # fix the case the user don't add $GOPATH/bin to PATH.
  389. export PATH="$PATH:${GOPATH:-$(go env GOPATH)}/bin"
  390. }
  391. do_up() {
  392. cleanup
  393. mkdir -p "$TMP_DIR"
  394. add_cleanup 'rm -rf "$TMP_DIR"'
  395. fix_path
  396. build_component_image gm lc
  397. # on github ci action, sometimes kind-load reported the error that gm/lc
  398. # image not present locally, here for debug.
  399. # TODO: remove these docker-images
  400. docker images
  401. build_worker_base_images
  402. docker images
  403. check_prerequisites
  404. localup_kubeedge
  405. prepare_k8s_env
  406. start_gm
  407. start_lc
  408. }
  409. do_up_fg() {
  410. trap cleanup EXIT
  411. do_up
  412. echo "Local Sedna cluster is $(green_text running).
  413. Currently local-up script only support foreground running.
  414. Press $(red_text Ctrl-C) to shut it down!
  415. You can use it with: kind export kubeconfig --name ${CLUSTER_NAME}
  416. $debug_infos
  417. "
  418. while check_healthy; do sleep 5; done
  419. }
  420. main() {
  421. if [ -z "${__WITH_SOURCE__:-}" ]; then
  422. do_up_fg
  423. else # __WITH_SOURCE__ mode, for run-e2e.sh
  424. do_up
  425. fi
  426. }
  427. main