feat: retry upload on jar not found
This commit is contained in:
parent
5abc044d69
commit
91ccfebfeb
@ -2,6 +2,7 @@ package managed_job
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"flink-kube-operator/internal/crd/v1alpha1"
|
"flink-kube-operator/internal/crd/v1alpha1"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"gitea.com/logicamp/lc"
|
"gitea.com/logicamp/lc"
|
||||||
@ -32,23 +33,44 @@ func (job *ManagedJob) Cycle() {
|
|||||||
|
|
||||||
// Init job
|
// Init job
|
||||||
if job.def.Status.JobStatus == "" {
|
if job.def.Status.JobStatus == "" {
|
||||||
err := job.upload()
|
if job.def.Status.LastSavepointPath == nil {
|
||||||
if err != nil {
|
if job.def.Status.JarId == nil {
|
||||||
job.crd.Patch(job.def.UID, map[string]interface{}{
|
err := job.upload()
|
||||||
"status": map[string]interface{}{
|
if err != nil {
|
||||||
"error": "[upload-error] " + err.Error(),
|
job.crd.Patch(job.def.UID, map[string]interface{}{
|
||||||
},
|
"status": map[string]interface{}{
|
||||||
})
|
"error": "[upload-error] " + err.Error(),
|
||||||
return
|
},
|
||||||
}
|
})
|
||||||
err = job.run()
|
return
|
||||||
if err != nil {
|
}
|
||||||
job.crd.Patch(job.def.UID, map[string]interface{}{
|
}
|
||||||
"status": map[string]interface{}{
|
for {
|
||||||
"error": "[run-error] " + err.Error(),
|
err := job.run()
|
||||||
},
|
if err != nil {
|
||||||
})
|
if strings.ContainsAny(err.Error(), ".jar does not exist") {
|
||||||
return
|
err := job.upload()
|
||||||
|
if err != nil {
|
||||||
|
job.crd.Patch(job.def.UID, map[string]interface{}{
|
||||||
|
"status": map[string]interface{}{
|
||||||
|
"error": "[upload-error] " + err.Error(),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
job.crd.Patch(job.def.UID, map[string]interface{}{
|
||||||
|
"status": map[string]interface{}{
|
||||||
|
"error": "[run-error] " + err.Error(),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
job.restore()
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,6 +3,7 @@ package managed_job
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"flink-kube-operator/internal/crd/v1alpha1"
|
"flink-kube-operator/internal/crd/v1alpha1"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"gitea.com/logicamp/lc"
|
"gitea.com/logicamp/lc"
|
||||||
@ -18,36 +19,53 @@ func (job *ManagedJob) restore() error {
|
|||||||
}
|
}
|
||||||
if job.def.Status.JarId == nil {
|
if job.def.Status.JarId == nil {
|
||||||
err := errors.New("missing jar id")
|
err := errors.New("missing jar id")
|
||||||
lc.Logger.Error("[managed-job] [run]", zap.Error(err))
|
lc.Logger.Error("[managed-job] [restore]", zap.Error(err))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
lc.Logger.Debug("[managed-job] [restore] restoring", zap.String("savepointPath", *job.def.Status.LastSavepointPath))
|
lc.Logger.Info("[managed-job] [restore] restoring job", zap.String("name", job.def.GetName()), zap.String("savepointPath", *job.def.Status.LastSavepointPath))
|
||||||
runJarResp, err := job.client.RunJar(api.RunOpts{
|
var jobId *string
|
||||||
JarID: *job.def.Status.JarId,
|
for {
|
||||||
AllowNonRestoredState: true,
|
runJarResp, err := job.client.RunJar(api.RunOpts{
|
||||||
EntryClass: job.def.Spec.EntryClass,
|
JarID: *job.def.Status.JarId,
|
||||||
SavepointPath: *job.def.Status.LastSavepointPath,
|
AllowNonRestoredState: true,
|
||||||
})
|
EntryClass: job.def.Spec.EntryClass,
|
||||||
if err != nil {
|
SavepointPath: *job.def.Status.LastSavepointPath,
|
||||||
lc.Logger.Error("[managed-job] [run]", zap.Error(err))
|
})
|
||||||
return err
|
if err != nil {
|
||||||
|
if strings.ContainsAny(err.Error(), ".jar does not exist") {
|
||||||
|
err := job.upload()
|
||||||
|
if err != nil {
|
||||||
|
job.crd.Patch(job.def.UID, map[string]interface{}{
|
||||||
|
"status": map[string]interface{}{
|
||||||
|
"error": "[upload-error] " + err.Error(),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
lc.Logger.Error("[managed-job] [restore]", zap.Error(err))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
jobId = &runJarResp.JobId
|
||||||
|
lc.Logger.Debug("[main] after run jar", zap.Any("run-jar-resp", runJarResp))
|
||||||
|
break
|
||||||
}
|
}
|
||||||
lc.Logger.Debug("[main] after run jar", zap.Any("run-jar-resp", runJarResp))
|
|
||||||
|
|
||||||
// job.def.Status.JobId = &runJarResp.JobId
|
// job.def.Status.JobId = &runJarResp.JobId
|
||||||
// job.def.Status.JobStatus = v1alpha1.JobStatusCreating
|
// job.def.Status.JobStatus = v1alpha1.JobStatusCreating
|
||||||
// job.def.Status.Error = nil
|
// job.def.Status.Error = nil
|
||||||
job.crd.Patch(job.def.UID, map[string]interface{}{
|
job.crd.Patch(job.def.UID, map[string]interface{}{
|
||||||
"status": map[string]interface{}{
|
"status": map[string]interface{}{
|
||||||
"jobId": &runJarResp.JobId,
|
"jobId": jobId,
|
||||||
"jobStatus": v1alpha1.JobStatusCreating,
|
"jobStatus": v1alpha1.JobStatusCreating,
|
||||||
"lifeCycleStatus": v1alpha1.LifeCycleStatusRestoring,
|
"lifeCycleStatus": v1alpha1.LifeCycleStatusRestoring,
|
||||||
"lastRestoredSavepointDate": job.def.Status.LastRestoredSavepointDate,
|
"lastRestoredSavepointDate": job.def.Status.LastSavepointDate,
|
||||||
"restoredCount": job.def.Status.RestoredCount + 1,
|
"restoredCount": job.def.Status.RestoredCount + 1,
|
||||||
"lastRestoredSavepointRestoreDate": time.Now().Format(time.RFC3339),
|
"lastRestoredSavepointRestoredDate": time.Now().Format(time.RFC3339),
|
||||||
"error": nil,
|
"error": nil,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
return err
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@ -37,10 +37,11 @@ func (job *ManagedJob) upload() error {
|
|||||||
// run the job from saved jarId in managedJob
|
// run the job from saved jarId in managedJob
|
||||||
func (job *ManagedJob) run() error {
|
func (job *ManagedJob) run() error {
|
||||||
if job.def.Status.JarId == nil {
|
if job.def.Status.JarId == nil {
|
||||||
err := errors.New("missing jar id")
|
err := errors.New("missing jar id")
|
||||||
lc.Logger.Error("[managed-job] [run]", zap.Error(err))
|
lc.Logger.Error("[managed-job] [run]", zap.Error(err))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
lc.Logger.Info("[managed-job] [run] starting job", zap.String("name", job.def.GetName()))
|
||||||
runJarResp, err := job.client.RunJar(api.RunOpts{
|
runJarResp, err := job.client.RunJar(api.RunOpts{
|
||||||
JarID: *job.def.Status.JarId,
|
JarID: *job.def.Status.JarId,
|
||||||
AllowNonRestoredState: true,
|
AllowNonRestoredState: true,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user