You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

storage_initializer_injector.go 10 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. /*
  2. Copyright 2021 The KubeEdge Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package globalmanager
  14. import (
  15. "net/url"
  16. "path/filepath"
  17. "strings"
  18. v1 "k8s.io/api/core/v1"
  19. "k8s.io/apimachinery/pkg/api/resource"
  20. "k8s.io/klog/v2"
  21. )
  22. const (
  23. downloadInitalizerContainerName = "storage-initializer"
  24. downloadInitalizerImage = "kubeedge/sedna-storage-initializer:v0.2.0"
  25. downloadInitalizerPrefix = "/downloads"
  26. downloadInitalizerVolumeName = "sedna-storage-initializer"
  27. hostPathPrefixEnvKey = "DATA_PATH_PREFIX"
  28. hostPathPrefix = "/home/data"
  29. urlsFieldSep = ";"
  30. indirectURLMark = "@"
  31. indirectURLMarkEnv = "INDIRECT_URL_MARK"
  32. )
  33. var supportStorageInitializerURLSchemes = [...]string{
  34. // s3 compatible storage
  35. "s3",
  36. // http server, only for downloading
  37. "http", "https",
  38. }
  39. var supportURLSchemes = [...]string{
  40. // s3 compatbile storage
  41. "s3",
  42. // http server, only for downloading
  43. "http", "https",
  44. // hostpath of node, for compatibility only
  45. // "/opt/data/model.pb"
  46. "",
  47. // the local path of worker-container
  48. "file",
  49. }
  50. type workerMountMode string
  51. const (
  52. workerMountReadOnly workerMountMode = "readonly"
  53. workerMountWriteOnly workerMountMode = "writeonly"
  54. // no read-write support for mount url/directory now
  55. )
  56. type MountURL struct {
  57. // URL is the url of dataset/model
  58. URL string
  59. // Indirect indicates the url is indirect, need to parse its content and download all,
  60. // and is used in dataset which has index url.
  61. //
  62. // when Indirect = true, URL could be in host path filesystem.
  63. // default: false
  64. Indirect bool
  65. // Mode indicates the url mode, default is workerMountReadOnly
  66. Mode workerMountMode
  67. // IsDir indicates that url is directory
  68. IsDir bool
  69. // if true, only mounts when url is hostpath
  70. EnableIfHostPath bool
  71. // the container path
  72. ContainerPath string
  73. // indicates the path this url will be mounted into container.
  74. // can be ContainerPath or its parent dir
  75. MountPath string
  76. // for host path, we just need to mount without downloading
  77. HostPath string
  78. // for download
  79. DownloadSrcURL string
  80. DownloadDstDir string
  81. // if true, then no mount
  82. Disable bool
  83. // the relevant secret
  84. Secret *v1.Secret
  85. SecretEnvs []v1.EnvVar
  86. // parsed for the parent of url
  87. u *url.URL
  88. }
  89. func (m *MountURL) Parse() {
  90. u, _ := url.Parse(m.URL)
  91. m.u = u
  92. m.parseDownloadPath()
  93. m.parseHostPath()
  94. m.parseSecret()
  95. }
  96. func (m *MountURL) parseDownloadPath() {
  97. if m.Mode == workerMountWriteOnly {
  98. // no storage-initializer for write only
  99. // leave the write operation to worker
  100. return
  101. }
  102. for _, scheme := range supportStorageInitializerURLSchemes {
  103. if m.u.Scheme == scheme {
  104. m.MountPath = downloadInitalizerPrefix
  105. // here use u.Host + u.Path to avoid conflict
  106. m.ContainerPath = filepath.Join(m.MountPath, m.u.Host+m.u.Path)
  107. m.DownloadSrcURL = m.URL
  108. m.DownloadDstDir, _ = filepath.Split(m.ContainerPath)
  109. break
  110. }
  111. }
  112. }
  113. func (m *MountURL) parseHostPath() {
  114. // for compatibility, hostpath of a node is supported.
  115. // e.g. the url of a dataset: /datasets/d1/label.txt
  116. if m.u.Scheme != "" {
  117. if m.EnableIfHostPath {
  118. // not hostpath, so disable it
  119. m.Disable = true
  120. }
  121. return
  122. }
  123. if m.IsDir {
  124. m.HostPath = m.URL
  125. m.MountPath = filepath.Join(hostPathPrefix, m.u.Path)
  126. m.ContainerPath = m.MountPath
  127. } else {
  128. // if file, here mount its parent directory
  129. m.HostPath, _ = filepath.Split(m.URL)
  130. m.ContainerPath = filepath.Join(hostPathPrefix, m.u.Path)
  131. m.MountPath, _ = filepath.Split(m.ContainerPath)
  132. if m.Indirect {
  133. // we need to download it
  134. // TODO: mv these to download-related section
  135. m.DownloadSrcURL = m.ContainerPath
  136. m.ContainerPath = filepath.Join(downloadInitalizerPrefix, m.u.Host+m.u.Path)
  137. m.DownloadDstDir, _ = filepath.Split(m.ContainerPath)
  138. }
  139. }
  140. }
  141. func (m *MountURL) parseSecret() {
  142. if m.Secret == nil {
  143. return
  144. }
  145. if strings.ToLower(m.u.Scheme) == "s3" || m.Indirect {
  146. SecretEnvs, err := buildS3SecretEnvs(m.Secret)
  147. if err == nil {
  148. m.SecretEnvs = SecretEnvs
  149. }
  150. }
  151. }
  152. func injectHostPathMount(pod *v1.Pod, workerParam *WorkerParam) {
  153. var volumes []v1.Volume
  154. var volumeMounts []v1.VolumeMount
  155. var initContainerVolumeMounts []v1.VolumeMount
  156. uniqVolumeName := make(map[string]bool)
  157. hostPathType := v1.HostPathDirectory
  158. for _, mount := range workerParam.mounts {
  159. for _, m := range mount.URLs {
  160. if m.HostPath == "" {
  161. continue
  162. }
  163. volumeName := ConvertK8SValidName(m.HostPath)
  164. if volumeName == "" {
  165. klog.Warningf("failed to convert volume name from the url and skipped: %s", m.URL)
  166. continue
  167. }
  168. if _, ok := uniqVolumeName[volumeName]; !ok {
  169. volumes = append(volumes, v1.Volume{
  170. Name: volumeName,
  171. VolumeSource: v1.VolumeSource{
  172. HostPath: &v1.HostPathVolumeSource{
  173. Path: m.HostPath,
  174. Type: &hostPathType,
  175. },
  176. },
  177. })
  178. uniqVolumeName[volumeName] = true
  179. }
  180. vm := v1.VolumeMount{
  181. MountPath: m.MountPath,
  182. Name: volumeName,
  183. }
  184. if m.Indirect {
  185. initContainerVolumeMounts = append(initContainerVolumeMounts, vm)
  186. } else {
  187. volumeMounts = append(volumeMounts, vm)
  188. }
  189. }
  190. }
  191. injectVolume(pod, volumes, volumeMounts)
  192. if len(volumeMounts) > 0 {
  193. hostPathEnvs := []v1.EnvVar{
  194. {
  195. Name: hostPathPrefixEnvKey,
  196. Value: hostPathPrefix,
  197. },
  198. }
  199. injectEnvs(pod, hostPathEnvs)
  200. }
  201. if len(initContainerVolumeMounts) > 0 {
  202. initIdx := len(pod.Spec.InitContainers) - 1
  203. pod.Spec.InitContainers[initIdx].VolumeMounts = append(
  204. pod.Spec.InitContainers[initIdx].VolumeMounts,
  205. initContainerVolumeMounts...,
  206. )
  207. }
  208. }
  209. func injectWorkerSecrets(pod *v1.Pod, workerParam *WorkerParam) {
  210. var secretEnvs []v1.EnvVar
  211. for _, mount := range workerParam.mounts {
  212. for _, m := range mount.URLs {
  213. if m.Disable || m.Mode != workerMountWriteOnly {
  214. continue
  215. }
  216. if len(m.SecretEnvs) > 0 {
  217. secretEnvs = MergeSecretEnvs(secretEnvs, m.SecretEnvs, false)
  218. }
  219. }
  220. }
  221. injectEnvs(pod, secretEnvs)
  222. }
  223. func injectInitializerContainer(pod *v1.Pod, workerParam *WorkerParam) {
  224. var volumes []v1.Volume
  225. var volumeMounts []v1.VolumeMount
  226. var downloadPairs []string
  227. var secretEnvs []v1.EnvVar
  228. for _, mount := range workerParam.mounts {
  229. for _, m := range mount.URLs {
  230. if m.Disable {
  231. continue
  232. }
  233. srcURL := m.DownloadSrcURL
  234. dstDir := m.DownloadDstDir
  235. if srcURL != "" && dstDir != "" {
  236. // need to add srcURL first: srcURL dstDir
  237. if m.Indirect {
  238. // here add indirectURLMark into dstDir which is controllable
  239. dstDir = indirectURLMark + dstDir
  240. }
  241. downloadPairs = append(downloadPairs, srcURL, dstDir)
  242. if len(m.SecretEnvs) > 0 {
  243. secretEnvs = MergeSecretEnvs(secretEnvs, m.SecretEnvs, false)
  244. }
  245. }
  246. }
  247. }
  248. // no need to download
  249. if len(downloadPairs) == 0 {
  250. return
  251. }
  252. envs := secretEnvs
  253. envs = append(envs, v1.EnvVar{
  254. Name: indirectURLMarkEnv,
  255. Value: indirectURLMark,
  256. })
  257. // use one empty directory
  258. storageVolume := v1.Volume{
  259. Name: downloadInitalizerVolumeName,
  260. VolumeSource: v1.VolumeSource{
  261. EmptyDir: &v1.EmptyDirVolumeSource{},
  262. },
  263. }
  264. storageVolumeMounts := v1.VolumeMount{
  265. Name: storageVolume.Name,
  266. MountPath: downloadInitalizerPrefix,
  267. ReadOnly: true,
  268. }
  269. volumes = append(volumes, storageVolume)
  270. volumeMounts = append(volumeMounts, storageVolumeMounts)
  271. initVolumeMounts := []v1.VolumeMount{
  272. {
  273. Name: storageVolume.Name,
  274. MountPath: downloadInitalizerPrefix,
  275. ReadOnly: false,
  276. },
  277. }
  278. initContainer := v1.Container{
  279. Name: downloadInitalizerContainerName,
  280. Image: downloadInitalizerImage,
  281. ImagePullPolicy: v1.PullIfNotPresent,
  282. Args: downloadPairs,
  283. TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError,
  284. Resources: v1.ResourceRequirements{
  285. Limits: map[v1.ResourceName]resource.Quantity{
  286. // limit one cpu
  287. v1.ResourceCPU: resource.MustParse("1"),
  288. // limit 1Gi memory
  289. v1.ResourceMemory: resource.MustParse("1Gi"),
  290. },
  291. },
  292. VolumeMounts: initVolumeMounts,
  293. Env: envs,
  294. }
  295. pod.Spec.InitContainers = append(pod.Spec.InitContainers, initContainer)
  296. injectVolume(pod, volumes, volumeMounts)
  297. }
  298. // InjectStorageInitializer injects these storage related volumes and envs into pod in-place
  299. func InjectStorageInitializer(pod *v1.Pod, workerParam *WorkerParam) {
  300. var mounts []WorkerMount
  301. // parse the mounts and environment key
  302. for _, mount := range workerParam.mounts {
  303. var envPaths []string
  304. if mount.URL != nil {
  305. mount.URLs = append(mount.URLs, *mount.URL)
  306. }
  307. var mountURLs []MountURL
  308. for _, m := range mount.URLs {
  309. m.Parse()
  310. if m.Disable {
  311. continue
  312. }
  313. mountURLs = append(mountURLs, m)
  314. if m.ContainerPath != "" {
  315. envPaths = append(envPaths, m.ContainerPath)
  316. } else {
  317. // keep the original URL if no container path
  318. envPaths = append(envPaths, m.URL)
  319. }
  320. }
  321. if len(mountURLs) > 0 {
  322. mount.URLs = mountURLs
  323. mounts = append(mounts, mount)
  324. }
  325. if mount.EnvName != "" {
  326. workerParam.env[mount.EnvName] = strings.Join(
  327. envPaths, urlsFieldSep,
  328. )
  329. }
  330. }
  331. workerParam.mounts = mounts
  332. // need to call injectInitializerContainer before injectHostPathMount
  333. // since injectHostPathMount could inject volumeMount to init container
  334. injectInitializerContainer(pod, workerParam)
  335. injectHostPathMount(pod, workerParam)
  336. injectWorkerSecrets(pod, workerParam)
  337. }
  338. func injectVolume(pod *v1.Pod, volumes []v1.Volume, volumeMounts []v1.VolumeMount) {
  339. if len(volumes) > 0 {
  340. pod.Spec.Volumes = append(pod.Spec.Volumes, volumes...)
  341. }
  342. if len(volumeMounts) > 0 {
  343. for idx := range pod.Spec.Containers {
  344. // inject every containers
  345. pod.Spec.Containers[idx].VolumeMounts = append(
  346. pod.Spec.Containers[idx].VolumeMounts, volumeMounts...,
  347. )
  348. }
  349. }
  350. }
  351. func injectEnvs(pod *v1.Pod, envs []v1.EnvVar) {
  352. if len(envs) > 0 {
  353. for idx := range pod.Spec.Containers {
  354. // inject every containers
  355. pod.Spec.Containers[idx].Env = append(
  356. pod.Spec.Containers[idx].Env, envs...,
  357. )
  358. }
  359. }
  360. }