Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions api/scheduling/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ type Options struct {
// committed resource reservation slot. Set for non-VM-placement runs (capacity checks,
// failover scheduling, CR slot scheduling) that must not modify reservation allocations.
SkipCommittedResourceTracking bool `json:"skip_committed_resource_tracking,omitempty"`
// SkipPlacementContextFilters skips filters that are only meaningful for actual VM
// placement triggered by a user request (e.g. instance group affinity). See the
// filters that check this option for the full list.
SkipPlacementContextFilters bool `json:"skip_placement_context_filters,omitempty"`
}

// Validate checks for mutually exclusive or inconsistent option combinations.
Expand Down
13 changes: 7 additions & 6 deletions helm/bundles/cortex-nova/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,9 @@ cortex-scheduling-controllers:
# that use committed resources. Requires also enabling of CR controllers and tasks
committedResourceTracking: false
# Pipeline used for the empty-state capacity probe (ignores allocations and reservations).
capacityTotalPipeline: "kvm-report-capacity"
capacityTotalPipeline: "kvm-general-purpose-load-balancing"
# Pipeline used for the current-state capacity probe (considers current VM allocations).
capacityPlaceablePipeline: "kvm-general-purpose-load-balancing-no-history"
capacityPlaceablePipeline: "kvm-general-purpose-load-balancing"
# How often the capacity controller re-runs its scheduler probes.
capacityReconcileInterval: 5m
# If true, the external scheduler API will limit the list of hosts in its
Expand All @@ -163,11 +163,12 @@ cortex-scheduling-controllers:
# Set to 0 or negative to disable shuffling.
evacuationShuffleK: 3
committedResourceReservationController:
# Maps flavor group IDs to pipeline names; "*" acts as catch-all fallback
# Pipeline selection for CR reservation scheduling. The catch-all default covers
# general-purpose flavors. For HANA flavor groups, add an explicit entry, e.g.:
# "my-hana-group": "kvm-hana-bin-packing"
flavorGroupPipelines:
"*": "kvm-general-purpose-load-balancing-no-history" # Catch-all fallback
# Fallback pipeline when no flavorGroupPipelines entry matches
pipelineDefault: "kvm-general-purpose-load-balancing-no-history"
"*": "kvm-general-purpose-load-balancing"
pipelineDefault: "kvm-general-purpose-load-balancing"
# How often to re-verify active Reservation CRDs (healthy state)
requeueIntervalActive: "5m"
# Back-off interval when knowledge is unavailable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ type FilterAggregateMetadata struct {
// the "filter_tenant_id" metadata key set.
func (s *FilterAggregateMetadata) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) {
result := s.IncludeAllHostsFromRequest(request)
if request.GetOptions().SkipPlacementContextFilters {
return result, nil
}

hvs := &hv1.HypervisorList{}
if err := s.Client.List(context.Background(), hvs); err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"testing"

api "github.com/cobaltcore-dev/cortex/api/external/nova"
"github.com/cobaltcore-dev/cortex/api/scheduling"
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -404,3 +405,40 @@ func TestFilterAggregateMetadata_IndexRegistration(t *testing.T) {
t.Errorf("expected factory to return *FilterAggregateMetadata, got %T", filter)
}
}

func TestFilterAggregateMetadata_SkipPlacementContextFilters(t *testing.T) {
scheme := runtime.NewScheme()
if err := hv1.AddToScheme(scheme); err != nil {
t.Fatalf("failed to add hv1 to scheme: %v", err)
}
// host1 is in an aggregate restricting to project-x; request is project-y → host1 would normally be filtered.
objects := []client.Object{
&hv1.Hypervisor{
ObjectMeta: metav1.ObjectMeta{Name: "host1"},
Status: hv1.HypervisorStatus{
Aggregates: []hv1.Aggregate{{
Name: "restricted",
Metadata: map[string]string{"filter_tenant_id": "project-x"},
}},
},
},
&hv1.Hypervisor{ObjectMeta: metav1.ObjectMeta{Name: "host2"}},
}
request := api.ExternalSchedulerRequest{
Spec: api.NovaObject[api.NovaSpec]{
Data: api.NovaSpec{ProjectID: "project-y"},
},
Hosts: []api.ExternalSchedulerHost{{ComputeHost: "host1"}, {ComputeHost: "host2"}},
}
step := &FilterAggregateMetadata{}
step.Client = fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build()

request.Options = scheduling.Options{SkipPlacementContextFilters: true}
result, err := step.Run(slog.Default(), request)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Activations) != 2 {
t.Errorf("expected both hosts to pass, got %d", len(result.Activations))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ type FilterAllowedProjectsStep struct {
// Note that hosts without specified projects are still accessible.
func (s *FilterAllowedProjectsStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) {
result := s.IncludeAllHostsFromRequest(request)
if request.GetOptions().SkipPlacementContextFilters {
return result, nil
}
if request.Spec.Data.ProjectID == "" {
traceLog.Info("no project ID in request, skipping filter")
return result, nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"testing"

api "github.com/cobaltcore-dev/cortex/api/external/nova"
"github.com/cobaltcore-dev/cortex/api/scheduling"
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -323,3 +324,35 @@ func TestFilterAllowedProjectsStep_Run(t *testing.T) {
})
}
}

func TestFilterAllowedProjectsStep_SkipPlacementContextFilters(t *testing.T) {
scheme := runtime.NewScheme()
if err := hv1.AddToScheme(scheme); err != nil {
t.Fatalf("failed to add hv1 to scheme: %v", err)
}
// host2 restricts to project-x; request is project-y → host2 would normally be filtered.
objects := []client.Object{
&hv1.Hypervisor{ObjectMeta: v1.ObjectMeta{Name: "host1"}},
&hv1.Hypervisor{
ObjectMeta: v1.ObjectMeta{Name: "host2"},
Spec: hv1.HypervisorSpec{AllowedProjects: []string{"project-x"}},
},
}
request := api.ExternalSchedulerRequest{
Spec: api.NovaObject[api.NovaSpec]{
Data: api.NovaSpec{ProjectID: "project-y"},
},
Hosts: []api.ExternalSchedulerHost{{ComputeHost: "host1"}, {ComputeHost: "host2"}},
}
step := &FilterAllowedProjectsStep{}
step.Client = fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build()

request.Options = scheduling.Options{SkipPlacementContextFilters: true}
result, err := step.Run(slog.Default(), request)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Activations) != 2 {
t.Errorf("expected both hosts to pass, got %d", len(result.Activations))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ type FilterExternalCustomerStep struct {
// that are not intended for external customers.
func (s *FilterExternalCustomerStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineStepResult, error) {
result := s.IncludeAllHostsFromRequest(request)
if request.GetOptions().SkipPlacementContextFilters {
return result, nil
}

// Skip for failover reservation scheduling — domain restrictions don't apply
// since failover reservations are not tied to a specific customer domain.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"testing"

api "github.com/cobaltcore-dev/cortex/api/external/nova"
"github.com/cobaltcore-dev/cortex/api/scheduling"
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -492,3 +493,38 @@ func TestFilterExternalCustomerStepOpts_Validate(t *testing.T) {
})
}
}

func TestFilterExternalCustomerStep_SkipPlacementContextFilters(t *testing.T) {
scheme := runtime.NewScheme()
if err := hv1.AddToScheme(scheme); err != nil {
t.Fatalf("failed to add hv1 to scheme: %v", err)
}
// Domain matches external prefix; host1 lacks the exclusive trait → host1 would normally be filtered.
objects := []client.Object{
&hv1.Hypervisor{ObjectMeta: v1.ObjectMeta{Name: "host1"}},
&hv1.Hypervisor{
ObjectMeta: v1.ObjectMeta{Name: "host2"},
Status: hv1.HypervisorStatus{Traits: []string{"CUSTOM_EXTERNAL_CUSTOMER_EXCLUSIVE"}},
},
}
request := api.ExternalSchedulerRequest{
Spec: api.NovaObject[api.NovaSpec]{
Data: api.NovaSpec{
SchedulerHints: map[string]any{"domain_name": "iaas-customer"},
},
},
Hosts: []api.ExternalSchedulerHost{{ComputeHost: "host1"}, {ComputeHost: "host2"}},
}
step := &FilterExternalCustomerStep{}
step.Client = fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build()
step.Options = FilterExternalCustomerStepOpts{CustomerDomainNamePrefixes: []string{"iaas-"}}

request.Options = scheduling.Options{SkipPlacementContextFilters: true}
result, err := step.Run(slog.Default(), request)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Activations) != 2 {
t.Errorf("expected both hosts to pass, got %d", len(result.Activations))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa
opts := request.GetOptions()
result := s.IncludeAllHostsFromRequest(request)

// Merge call-time options with static step config.
ignoreAllocations := s.Options.IgnoreAllocations || opts.AssumeEmptyHosts
ignoredReservationTypes := slices.Concat(s.Options.IgnoredReservationTypes, opts.IgnoredReservationTypes)

// This map holds the free resources per host.
freeResourcesByHost := make(map[string]map[hv1.ResourceName]resource.Quantity)

Expand All @@ -87,7 +91,7 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa
}

// Subtract allocated resources (skip when ignoring allocations for empty-datacenter capacity queries).
if !s.Options.IgnoreAllocations {
if !ignoreAllocations {
for resourceName, allocated := range hv.Status.Allocation {
free, ok := freeResourcesByHost[hv.Name][resourceName]
if !ok {
Expand All @@ -110,7 +114,7 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa
}
for _, reservation := range reservations.Items {
// Check if this reservation type should be ignored — applies regardless of ready state.
if slices.Contains(s.Options.IgnoredReservationTypes, reservation.Spec.Type) {
if slices.Contains(ignoredReservationTypes, reservation.Spec.Type) {
traceLog.Debug("ignoring reservation type", "type", reservation.Spec.Type, "reservation", reservation.Name)
continue
}
Expand Down Expand Up @@ -209,7 +213,7 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa
// Oversize spec-only: if a pending VM is larger than the remaining slot, block its full size.
//
// FailoverReservations: block = Spec.Resources (always fully blocked).
resourcesToBlock := resv.UnusedReservationCapacity(&reservation, s.Options.IgnoreAllocations)
resourcesToBlock := resv.UnusedReservationCapacity(&reservation, ignoreAllocations)

// Block the calculated resources on each host
for host := range hostsToBlock {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ func parseMemoryToMB(memory string) uint64 {
return uint64(bytes / (1024 * 1024)) //nolint:gosec // test code
}

func newNovaRequest(instanceUUID, projectID, flavorName, flavorGroup string, vcpus int, memory string, evacuation bool, hosts []string) api.ExternalSchedulerRequest { //nolint:unparam // vcpus varies in real usage
func newNovaRequest(instanceUUID, projectID, flavorName, flavorGroup string, vcpus int, memory string, evacuation bool, hosts []string) api.ExternalSchedulerRequest {
return newNovaRequestWithIntent(instanceUUID, projectID, flavorName, flavorGroup, vcpus, memory, "", evacuation, hosts)
}

Expand Down Expand Up @@ -961,6 +961,41 @@ func TestFilterHasEnoughCapacity_IgnoredReservationTypes(t *testing.T) {
}
}

func TestFilterHasEnoughCapacity_AssumeEmptyHosts(t *testing.T) {
scheme := buildTestScheme(t)

// host1: 8 CPU total, 6 CPU allocated to running VMs → 2 free; request needs 4 → fails normally.
// With AssumeEmptyHosts: allocations ignored → 8 free → passes.
hypervisors := []*hv1.Hypervisor{
newHypervisor("host1", "8", "6", "32Gi", "0"),
}
request := newNovaRequest("vm", "proj", "m1.large", "gp", 4, "1Gi", false, []string{"host1"})

objects := make([]client.Object, 0, len(hypervisors))
for _, h := range hypervisors {
objects = append(objects, h.DeepCopy())
}

step := &FilterHasEnoughCapacity{}
step.Client = fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build()
step.Options = FilterHasEnoughCapacityOpts{}

// Without AssumeEmptyHosts: host1 filtered (only 2 CPU free).
resultWithout, err := step.Run(slog.Default(), request)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
assertActivations(t, resultWithout.Activations, []string{}, []string{"host1"})

// With AssumeEmptyHosts via call-time options: allocations ignored → host1 passes.
request.Options = scheduling.Options{AssumeEmptyHosts: true}
resultWith, err := step.Run(slog.Default(), request)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
assertActivations(t, resultWith.Activations, []string{"host1"}, []string{})
}

func TestFilterHasEnoughCapacity_IgnoredReservationTypes_CallTime(t *testing.T) {
scheme := buildTestScheme(t)

Expand All @@ -986,7 +1021,7 @@ func TestFilterHasEnoughCapacity_IgnoredReservationTypes_CallTime(t *testing.T)

step := &FilterHasEnoughCapacity{}
step.Client = fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build()
// Ignore CR reservations via pipeline-level opts (call-time opts.IgnoredReservationTypes removed in favour of YAML params).
// Ignore CR reservations via YAML step opts (static, configured per pipeline step).
step.Options = FilterHasEnoughCapacityOpts{
LockReserved: true,
IgnoredReservationTypes: []v1alpha1.ReservationType{v1alpha1.ReservationTypeCommittedResource},
Expand All @@ -999,6 +1034,49 @@ func TestFilterHasEnoughCapacity_IgnoredReservationTypes_CallTime(t *testing.T)
assertActivations(t, result.Activations, []string{"host1"}, []string{"host2"})
}

func TestFilterHasEnoughCapacity_IgnoredReservationTypes_CallTimeMerge(t *testing.T) {
scheme := buildTestScheme(t)

// Same two-host setup: CR on host1, Failover on host2.
// Each blocks 4 CPU, leaving 4 free; request needs 8 CPU so both hosts fail without ignoring.
// Verify that call-time opts.IgnoredReservationTypes is merged with (not instead of) YAML opts.
hypervisors := []*hv1.Hypervisor{
newHypervisor("host1", "16", "8", "32Gi", "16Gi"),
newHypervisor("host2", "16", "8", "32Gi", "16Gi"),
}
reservations := []*v1alpha1.Reservation{
newCommittedReservation("cr-res", "host1", "project-X", "m1.large", "gp-1", "4", "8Gi", nil, nil),
newFailoverReservation("failover-res", "host2", "4", "8Gi", map[string]string{"other-vm": "host3"}),
}
request := newNovaRequest("instance-123", "project-A", "m1.large", "gp-1", 8, "16Gi", false, []string{"host1", "host2"})

objects := make([]client.Object, 0, len(hypervisors)+len(reservations))
for _, h := range hypervisors {
objects = append(objects, h.DeepCopy())
}
for _, r := range reservations {
objects = append(objects, r.DeepCopy())
}

step := &FilterHasEnoughCapacity{}
step.Client = fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build()
// YAML opts ignore CR; call-time opts ignore Failover — both must be respected.
step.Options = FilterHasEnoughCapacityOpts{
LockReserved: true,
IgnoredReservationTypes: []v1alpha1.ReservationType{v1alpha1.ReservationTypeCommittedResource},
}
request.Options = scheduling.Options{
IgnoredReservationTypes: []v1alpha1.ReservationType{v1alpha1.ReservationTypeFailover},
}

result, err := step.Run(slog.Default(), request)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
// Both CR (host1) and Failover (host2) reservations ignored → both hosts pass.
assertActivations(t, result.Activations, []string{"host1", "host2"}, []string{})
}

func TestFilterHasEnoughCapacity_ReserveForCommittedResourceIntent(t *testing.T) {
scheme := buildTestScheme(t)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ func (s *FilterInstanceGroupAffinityStep) Run(
) (*lib.FilterWeigherPipelineStepResult, error) {

result := s.IncludeAllHostsFromRequest(request)
if request.GetOptions().SkipPlacementContextFilters {
return result, nil
}

ig := request.Spec.Data.InstanceGroup
if ig == nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"testing"

api "github.com/cobaltcore-dev/cortex/api/external/nova"
"github.com/cobaltcore-dev/cortex/api/scheduling"
)

func TestFilterInstanceGroupAffinityStep_Run(t *testing.T) {
Expand Down Expand Up @@ -352,3 +353,21 @@ func TestFilterInstanceGroupAffinityStep_Run(t *testing.T) {
})
}
}

func TestFilterInstanceGroupAffinityStep_SkipPlacementContextFilters(t *testing.T) {
// Affinity group on host1 only — host2 would normally be filtered.
request := newNovaRequest("vm", "proj", "m1.small", "gp", 1, "1Gi", false, []string{"host1", "host2"})
request.Spec.Data.InstanceGroup = &api.NovaObject[api.NovaInstanceGroup]{
Data: api.NovaInstanceGroup{Policy: "affinity", Hosts: []string{"host1"}},
}
step := &FilterInstanceGroupAffinityStep{}

request.Options = scheduling.Options{SkipPlacementContextFilters: true}
result, err := step.Run(slog.Default(), request)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Activations) != 2 {
t.Errorf("expected both hosts to pass, got %d", len(result.Activations))
}
}
Loading