You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

README.md 4.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. # Dog-Croissants-classification Demo
  2. ## Prepare Model
  3. auto-download
  4. ## Prepare for inference worker
  5. ```shell
  6. mkdir -p /incremental_learning/infer/
  7. mkdir -p /incremental_learning/he/
  8. mkdir -p /data/dog_croissants/
  9. mkdir /output
  10. ```
  11. TODO:download dataset. I have no idea where I should put dataset
  12. ```shell
  13. ```
  14. download checkpoint
  15. ```shell
  16. # need ckpt file under both two dir
  17. mkdir -p /models/base_model
  18. mkdir -p /models/deploy_model
  19. cd /models/base_model
  20. curl https://download.mindspore.cn/vision/classification/mobilenet_v2_1.0_224.ckpt -o base_model.ckpt
  21. cd ../deploy_model
  22. curl https://download.mindspore.cn/vision/classification/mobilenet_v2_1.0_224.ckpt -o deploy_model.ckpt
  23. ```
  24. ## build docker file
  25. ```shell
  26. $ docker build -f incremental-learning-dog-croissants-classification.Dockerfile -t test/dog:v0.1 .
  27. ```
  28. ## Create Incremental Job
  29. ```shell
  30. WORKER_NODE="edge-node"
  31. ```
  32. Create Dataset
  33. ```shell
  34. kubectl create -f - <<EOF
  35. apiVersion: sedna.io/v1alpha1
  36. kind: Dataset
  37. metadata:
  38. name: incremental-dataset
  39. spec:
  40. url: "/data/dog_croissants/train_data.txt"
  41. format: "txt"
  42. nodeName: $WORKER_NODE
  43. EOF
  44. ```
  45. Create initial Model to simulate the inital model in incremental learning scenoario
  46. ```shell
  47. kubectl create -f - <<EOF
  48. apiVersion: sedna.io/v1alpha1
  49. kind: Model
  50. metadata:
  51. name: initial-model
  52. spec:
  53. url : "/models/base_model/base_model.ckpt"
  54. format: "ckpt"
  55. EOF
  56. ```
  57. Create Deploy Model
  58. ```shell
  59. kubectl create -f - <<EOF
  60. apiVersion: sedna.io/v1alpha1
  61. kind: Model
  62. metadata:
  63. name: deploy-model
  64. spec:
  65. url : "/models/deploy_model/deploy_model.ckpt"
  66. format: "ckpt"
  67. EOF
  68. ```
  69. create the job
  70. ```shell
  71. IMAGE=lj1ang/dog:v0.40
  72. kubectl create -f - <<EOF
  73. apiVersion: sedna.io/v1alpha1
  74. kind: IncrementalLearningJob
  75. metadata:
  76. name: dog-croissants-classification-demo
  77. spec:
  78. initialModel:
  79. name: "initial-model"
  80. dataset:
  81. name: "incremental-dataset"
  82. trainProb: 0.8
  83. trainSpec:
  84. template:
  85. spec:
  86. nodeName: $WORKER_NODE
  87. containers:
  88. - image: $IMAGE
  89. name: train-worker
  90. imagePullPolicy: IfNotPresent
  91. args: [ "train.py" ]
  92. env:
  93. - name: "batch_size"
  94. value: "2"
  95. - name: "epochs"
  96. value: "2"
  97. - name: "input_shape"
  98. value: "224"
  99. - name: "class_names"
  100. value: "Croissants, Dog"
  101. - name: "num_parallel_workers"
  102. value: "2"
  103. trigger:
  104. checkPeriodSeconds: 60
  105. timer:
  106. start: 02:00
  107. end: 20:00
  108. condition:
  109. operator: ">"
  110. threshold: 50
  111. metric: num_of_samples
  112. evalSpec:
  113. template:
  114. spec:
  115. nodeName: $WORKER_NODE
  116. containers:
  117. - image: $IMAGE
  118. name: eval-worker
  119. imagePullPolicy: IfNotPresent
  120. args: [ "eval.py" ]
  121. env:
  122. - name: "input_shape"
  123. value: "224"
  124. - name: "batch_size"
  125. value: "2"
  126. - name: "num_parallel_workers"
  127. value: "2"
  128. - name: "class_names"
  129. value: "Croissants, Dog"
  130. deploySpec:
  131. model:
  132. name: "deploy-model"
  133. hotUpdateEnabled: true
  134. pollPeriodSeconds: 60
  135. trigger:
  136. condition:
  137. operator: ">"
  138. threshold: 0.1
  139. metric: precision_delta
  140. hardExampleMining:
  141. name: "Random"
  142. parameters:
  143. - key: "random_ratio"
  144. value: "0.3"
  145. template:
  146. spec:
  147. nodeName: $WORKER_NODE
  148. containers:
  149. - image: $IMAGE
  150. name: infer-worker
  151. imagePullPolicy: IfNotPresent
  152. args: [ "inference.py" ]
  153. env:
  154. - name: "input_shape"
  155. value: "224"
  156. - name: "infer_url"
  157. value: "/infer"
  158. - name: "HE_SAVED_URL"
  159. value: "/he_saved_url"
  160. volumeMounts:
  161. - name: localinferdir
  162. mountPath: /infer
  163. - name: hedir
  164. mountPath: /he_saved_url
  165. resources: # user defined resources
  166. limits:
  167. memory: 3Gi
  168. volumes: # user defined volumes
  169. - name: localinferdir
  170. hostPath:
  171. path: /incremental_learning/infer/
  172. type: DirectoryOrCreate
  173. - name: hedir
  174. hostPath:
  175. path: /incremental_learning/he/
  176. type: DirectoryOrCreate
  177. outputDir: "/output"
  178. EOF
  179. ```
  180. ## trigger
  181. ```shell
  182. cd /data/helmet_detection
  183. wget https://kubeedge.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/dataset.tar.gz
  184. tar -zxvf dataset.tar.gz
  185. ```
  186. ## delete
  187. ```shell
  188. kubectl delete dataset incremental-dataset
  189. kubectl delete model initial-model
  190. kubectl delete model deploy-model
  191. kubectl delete IncrementalLearningJob dog-croissants-classification-demo
  192. ```