feat: retry upload on jar not found

This commit is contained in:
Mohamad Khani 2024-12-08 01:24:14 +03:30
parent 5abc044d69
commit 91ccfebfeb
3 changed files with 79 additions and 38 deletions

View File

@ -2,6 +2,7 @@ package managed_job
import ( import (
"flink-kube-operator/internal/crd/v1alpha1" "flink-kube-operator/internal/crd/v1alpha1"
"strings"
"time" "time"
"gitea.com/logicamp/lc" "gitea.com/logicamp/lc"
@ -32,23 +33,44 @@ func (job *ManagedJob) Cycle() {
// Init job // Init job
if job.def.Status.JobStatus == "" { if job.def.Status.JobStatus == "" {
err := job.upload() if job.def.Status.LastSavepointPath == nil {
if err != nil { if job.def.Status.JarId == nil {
job.crd.Patch(job.def.UID, map[string]interface{}{ err := job.upload()
"status": map[string]interface{}{ if err != nil {
"error": "[upload-error] " + err.Error(), job.crd.Patch(job.def.UID, map[string]interface{}{
}, "status": map[string]interface{}{
}) "error": "[upload-error] " + err.Error(),
return },
} })
err = job.run() return
if err != nil { }
job.crd.Patch(job.def.UID, map[string]interface{}{ }
"status": map[string]interface{}{ for {
"error": "[run-error] " + err.Error(), err := job.run()
}, if err != nil {
}) if strings.ContainsAny(err.Error(), ".jar does not exist") {
return err := job.upload()
if err != nil {
job.crd.Patch(job.def.UID, map[string]interface{}{
"status": map[string]interface{}{
"error": "[upload-error] " + err.Error(),
},
})
return
}
continue
}
job.crd.Patch(job.def.UID, map[string]interface{}{
"status": map[string]interface{}{
"error": "[run-error] " + err.Error(),
},
})
return
}
return
}
} else {
job.restore()
} }
return return
} }

View File

@ -3,6 +3,7 @@ package managed_job
import ( import (
"errors" "errors"
"flink-kube-operator/internal/crd/v1alpha1" "flink-kube-operator/internal/crd/v1alpha1"
"strings"
"time" "time"
"gitea.com/logicamp/lc" "gitea.com/logicamp/lc"
@ -18,36 +19,53 @@ func (job *ManagedJob) restore() error {
} }
if job.def.Status.JarId == nil { if job.def.Status.JarId == nil {
err := errors.New("missing jar id") err := errors.New("missing jar id")
lc.Logger.Error("[managed-job] [run]", zap.Error(err)) lc.Logger.Error("[managed-job] [restore]", zap.Error(err))
return err return err
} }
lc.Logger.Debug("[managed-job] [restore] restoring", zap.String("savepointPath", *job.def.Status.LastSavepointPath)) lc.Logger.Info("[managed-job] [restore] restoring job", zap.String("name", job.def.GetName()), zap.String("savepointPath", *job.def.Status.LastSavepointPath))
runJarResp, err := job.client.RunJar(api.RunOpts{ var jobId *string
JarID: *job.def.Status.JarId, for {
AllowNonRestoredState: true, runJarResp, err := job.client.RunJar(api.RunOpts{
EntryClass: job.def.Spec.EntryClass, JarID: *job.def.Status.JarId,
SavepointPath: *job.def.Status.LastSavepointPath, AllowNonRestoredState: true,
}) EntryClass: job.def.Spec.EntryClass,
if err != nil { SavepointPath: *job.def.Status.LastSavepointPath,
lc.Logger.Error("[managed-job] [run]", zap.Error(err)) })
return err if err != nil {
if strings.ContainsAny(err.Error(), ".jar does not exist") {
err := job.upload()
if err != nil {
job.crd.Patch(job.def.UID, map[string]interface{}{
"status": map[string]interface{}{
"error": "[upload-error] " + err.Error(),
},
})
return nil
}
continue
}
lc.Logger.Error("[managed-job] [restore]", zap.Error(err))
return err
}
jobId = &runJarResp.JobId
lc.Logger.Debug("[main] after run jar", zap.Any("run-jar-resp", runJarResp))
break
} }
lc.Logger.Debug("[main] after run jar", zap.Any("run-jar-resp", runJarResp))
// job.def.Status.JobId = &runJarResp.JobId // job.def.Status.JobId = &runJarResp.JobId
// job.def.Status.JobStatus = v1alpha1.JobStatusCreating // job.def.Status.JobStatus = v1alpha1.JobStatusCreating
// job.def.Status.Error = nil // job.def.Status.Error = nil
job.crd.Patch(job.def.UID, map[string]interface{}{ job.crd.Patch(job.def.UID, map[string]interface{}{
"status": map[string]interface{}{ "status": map[string]interface{}{
"jobId": &runJarResp.JobId, "jobId": jobId,
"jobStatus": v1alpha1.JobStatusCreating, "jobStatus": v1alpha1.JobStatusCreating,
"lifeCycleStatus": v1alpha1.LifeCycleStatusRestoring, "lifeCycleStatus": v1alpha1.LifeCycleStatusRestoring,
"lastRestoredSavepointDate": job.def.Status.LastRestoredSavepointDate, "lastRestoredSavepointDate": job.def.Status.LastSavepointDate,
"restoredCount": job.def.Status.RestoredCount + 1, "restoredCount": job.def.Status.RestoredCount + 1,
"lastRestoredSavepointRestoreDate": time.Now().Format(time.RFC3339), "lastRestoredSavepointRestoredDate": time.Now().Format(time.RFC3339),
"error": nil, "error": nil,
}, },
}) })
return err return nil
} }

View File

@ -37,10 +37,11 @@ func (job *ManagedJob) upload() error {
// run the job from saved jarId in managedJob // run the job from saved jarId in managedJob
func (job *ManagedJob) run() error { func (job *ManagedJob) run() error {
if job.def.Status.JarId == nil { if job.def.Status.JarId == nil {
err := errors.New("missing jar id") err := errors.New("missing jar id")
lc.Logger.Error("[managed-job] [run]", zap.Error(err)) lc.Logger.Error("[managed-job] [run]", zap.Error(err))
return err return err
} }
lc.Logger.Info("[managed-job] [run] starting job", zap.String("name", job.def.GetName()))
runJarResp, err := job.client.RunJar(api.RunOpts{ runJarResp, err := job.client.RunJar(api.RunOpts{
JarID: *job.def.Status.JarId, JarID: *job.def.Status.JarId,
AllowNonRestoredState: true, AllowNonRestoredState: true,