import os
from gcore import Gcore
client = Gcore(
api_key=os.environ.get("GCORE_API_KEY"), # This is the default and can be omitted
)
page = client.cloud.inference.deployments.list(
project_id=1,
)
page = page.results[0]
print(page.project_id){
"count": 1,
"results": [
{
"address": "https://example.com",
"auth_enabled": true,
"command": "<string>",
"containers": [
{
"address": "https://example.com",
"deploy_status": {
"ready": 123,
"total": 123
},
"error_message": "Failed to pull image",
"region_id": 123,
"scale": {
"cooldown_period": 60,
"max": 123,
"min": 123,
"polling_interval": 30,
"triggers": {
"cpu": {
"threshold": 80
},
"gpu_memory": {
"threshold": 80
},
"gpu_utilization": {
"threshold": 80
},
"http": {
"rate": 1,
"window": 60
},
"memory": {
"threshold": 80
},
"sqs": {
"activation_queue_length": 123,
"aws_endpoint": "<string>",
"aws_region": "<string>",
"queue_length": 123,
"queue_url": "<string>",
"scale_on_delayed": true,
"scale_on_flight": true,
"secret_name": "<string>"
}
}
}
}
],
"created_at": "2023-08-22T11:21:00Z",
"credentials_name": "<string>",
"description": "<string>",
"envs": {
"DEBUG_MODE": "False",
"KEY": "12345"
},
"flavor_name": "<string>",
"image": "<string>",
"ingress_opts": {
"disable_response_buffering": true
},
"listening_port": 123,
"logging": {
"destination_region_id": 1,
"enabled": true,
"retention_policy": {
"period": 45
},
"topic_name": "my-log-name"
},
"name": "<string>",
"object_references": [
{
"kind": "AppDeployment",
"name": "<string>"
}
],
"probes": {
"liveness_probe": {
"enabled": true,
"probe": {
"exec": {
"command": [
"<string>"
]
},
"failure_threshold": 123,
"http_get": {
"headers": {},
"host": "127.0.0.1",
"path": "<string>",
"port": 123,
"schema": "<string>"
},
"initial_delay_seconds": 123,
"period_seconds": 123,
"success_threshold": 123,
"tcp_socket": {
"port": 123
},
"timeout_seconds": 123
}
},
"readiness_probe": {
"enabled": true,
"probe": {
"exec": {
"command": [
"<string>"
]
},
"failure_threshold": 123,
"http_get": {
"headers": {},
"host": "127.0.0.1",
"path": "<string>",
"port": 123,
"schema": "<string>"
},
"initial_delay_seconds": 123,
"period_seconds": 123,
"success_threshold": 123,
"tcp_socket": {
"port": 123
},
"timeout_seconds": 123
}
},
"startup_probe": {
"enabled": true,
"probe": {
"exec": {
"command": [
"<string>"
]
},
"failure_threshold": 123,
"http_get": {
"headers": {},
"host": "127.0.0.1",
"path": "<string>",
"port": 123,
"schema": "<string>"
},
"initial_delay_seconds": 123,
"period_seconds": 123,
"success_threshold": 123,
"tcp_socket": {
"port": 123
},
"timeout_seconds": 123
}
}
},
"project_id": 123,
"status": "ACTIVE",
"timeout": 120,
"api_keys": [
"key1",
"key2"
]
}
]
}import os
from gcore import Gcore
client = Gcore(
api_key=os.environ.get("GCORE_API_KEY"), # This is the default and can be omitted
)
page = client.cloud.inference.deployments.list(
project_id=1,
)
page = page.results[0]
print(page.project_id){
"count": 1,
"results": [
{
"address": "https://example.com",
"auth_enabled": true,
"command": "<string>",
"containers": [
{
"address": "https://example.com",
"deploy_status": {
"ready": 123,
"total": 123
},
"error_message": "Failed to pull image",
"region_id": 123,
"scale": {
"cooldown_period": 60,
"max": 123,
"min": 123,
"polling_interval": 30,
"triggers": {
"cpu": {
"threshold": 80
},
"gpu_memory": {
"threshold": 80
},
"gpu_utilization": {
"threshold": 80
},
"http": {
"rate": 1,
"window": 60
},
"memory": {
"threshold": 80
},
"sqs": {
"activation_queue_length": 123,
"aws_endpoint": "<string>",
"aws_region": "<string>",
"queue_length": 123,
"queue_url": "<string>",
"scale_on_delayed": true,
"scale_on_flight": true,
"secret_name": "<string>"
}
}
}
}
],
"created_at": "2023-08-22T11:21:00Z",
"credentials_name": "<string>",
"description": "<string>",
"envs": {
"DEBUG_MODE": "False",
"KEY": "12345"
},
"flavor_name": "<string>",
"image": "<string>",
"ingress_opts": {
"disable_response_buffering": true
},
"listening_port": 123,
"logging": {
"destination_region_id": 1,
"enabled": true,
"retention_policy": {
"period": 45
},
"topic_name": "my-log-name"
},
"name": "<string>",
"object_references": [
{
"kind": "AppDeployment",
"name": "<string>"
}
],
"probes": {
"liveness_probe": {
"enabled": true,
"probe": {
"exec": {
"command": [
"<string>"
]
},
"failure_threshold": 123,
"http_get": {
"headers": {},
"host": "127.0.0.1",
"path": "<string>",
"port": 123,
"schema": "<string>"
},
"initial_delay_seconds": 123,
"period_seconds": 123,
"success_threshold": 123,
"tcp_socket": {
"port": 123
},
"timeout_seconds": 123
}
},
"readiness_probe": {
"enabled": true,
"probe": {
"exec": {
"command": [
"<string>"
]
},
"failure_threshold": 123,
"http_get": {
"headers": {},
"host": "127.0.0.1",
"path": "<string>",
"port": 123,
"schema": "<string>"
},
"initial_delay_seconds": 123,
"period_seconds": 123,
"success_threshold": 123,
"tcp_socket": {
"port": 123
},
"timeout_seconds": 123
}
},
"startup_probe": {
"enabled": true,
"probe": {
"exec": {
"command": [
"<string>"
]
},
"failure_threshold": 123,
"http_get": {
"headers": {},
"host": "127.0.0.1",
"path": "<string>",
"port": 123,
"schema": "<string>"
},
"initial_delay_seconds": 123,
"period_seconds": 123,
"success_threshold": 123,
"tcp_socket": {
"port": 123
},
"timeout_seconds": 123
}
}
},
"project_id": 123,
"status": "ACTIVE",
"timeout": 120,
"api_keys": [
"key1",
"key2"
]
}
]
}API key for authentication. Make sure to include the word apikey, followed by a single space and then your token.
Example: apikey 1234$abcdef
Project ID
Optional. Limit the number of returned items
x <= 1000Optional. Offset value is used to exclude the first set of records from the result
x >= 0OK
Number of objects
x >= 0Objects
Show child attributes
Address of the inference instance
1"https://example.com"
true if instance uses API key authentication. "Authorization": "Bearer *****" or "X-Api-Key": "*****" header is required for the requests to the instance if enabled.
Command to be executed when running a container from an image.
List of containers for the inference instance
Show child attributes
Address of the inference instance
1"https://example.com"
Error message if the container deployment failed
"Failed to pull image"
Region name for the container
Scale for the container
Show child attributes
Cooldown period between scaling actions in seconds
60
Maximum scale for the container
Minimum scale for the container
Polling interval for scaling triggers in seconds
30
Triggers for scaling actions
Show child attributes
GPU memory trigger configuration. Calculated by DCGM_FI_DEV_MEM_COPY_UTIL metric
Show child attributes
Threshold value for the trigger in percentage
{ "threshold": 80 }GPU utilization trigger configuration. Calculated by DCGM_FI_DEV_GPU_UTIL metric
Show child attributes
Threshold value for the trigger in percentage
{ "threshold": 80 }HTTP trigger configuration
{ "rate": 1, "window": 60 }SQS trigger configuration
Show child attributes
Number of messages for activation
Custom AWS endpoint
AWS region
Number of messages for one replica
SQS queue URL
Scale on delayed messages
Scale on in-flight messages
Auth secret name
Inference instance creation date in ISO 8601 format.
"2023-08-22T11:21:00Z"
Registry credentials name
Inference instance description.
Flavor name for the inference instance
Docker image for the inference instance. This field should contain the image name and tag in the format 'name:tag', e.g., 'nginx:latest'. It defaults to Docker Hub as the image registry, but any accessible Docker image URL can be specified.
Ingress options for the inference instance
Show child attributes
Disable response buffering if true. A client usually has a much slower connection and can not consume the response data as fast as it is produced by an upstream application. Ingress tries to buffer the whole response in order to release the upstream application as soon as possible.By default, the response buffering is enabled.
{ "disable_response_buffering": true }Listening port for the inference instance.
Logging configuration for the inference instance
Show child attributes
ID of the region in which the logs will be stored
1
Indicates if log streaming is enabled or disabled
The topic name to stream logs to
"my-log-name"
{
"destination_region_id": 1,
"enabled": true,
"retention_policy": { "period": 45 },
"topic_name": "my-log-name"
}Inference instance name.
Probes configured for all containers of the inference instance.
Show child attributes
Liveness probe configuration
Show child attributes
Whether the probe is enabled or not.
Probe configuration (exec, http_get or tcp_socket)
Show child attributes
The number of consecutive probe failures that mark the container as unhealthy.
HTTP GET probe configuration
Show child attributes
Host name to send HTTP request to.
"127.0.0.1"
The endpoint to send the HTTP request to.
Port number the probe should connect to.
Schema to use for the HTTP request.
The initial delay before starting the first probe.
How often (in seconds) to perform the probe.
The number of consecutive successful probes that mark the container as healthy.
The timeout for each probe.
Readiness probe configuration
Show child attributes
Whether the probe is enabled or not.
Probe configuration (exec, http_get or tcp_socket)
Show child attributes
The number of consecutive probe failures that mark the container as unhealthy.
HTTP GET probe configuration
Show child attributes
Host name to send HTTP request to.
"127.0.0.1"
The endpoint to send the HTTP request to.
Port number the probe should connect to.
Schema to use for the HTTP request.
The initial delay before starting the first probe.
How often (in seconds) to perform the probe.
The number of consecutive successful probes that mark the container as healthy.
The timeout for each probe.
Startup probe configuration
Show child attributes
Whether the probe is enabled or not.
Probe configuration (exec, http_get or tcp_socket)
Show child attributes
The number of consecutive probe failures that mark the container as unhealthy.
HTTP GET probe configuration
Show child attributes
Host name to send HTTP request to.
"127.0.0.1"
The endpoint to send the HTTP request to.
Port number the probe should connect to.
Schema to use for the HTTP request.
The initial delay before starting the first probe.
How often (in seconds) to perform the probe.
The number of consecutive successful probes that mark the container as healthy.
The timeout for each probe.
Project ID. If not provided, your default project ID will be used.
Inference instance status.
Value can be one of the following:
DEPLOYING - The instance is being deployed. Containers are not yet created.PARTIALLYDEPLOYED - All containers have been created, but some may not be ready yet. Instances stuck in this state typically indicate either image being pulled, or a failure of some kind. In the latter case, the error_message field of the respective container object in the containers collection explains the failure reason.ACTIVE - The instance is running and ready to accept requests.DISABLED - The instance is disabled and not accepting any requests.PENDING - The instance is running but scaled to zero. It will be automatically scaled up when a request is made.DELETING - The instance is being deleted.ACTIVE, DELETING, DEPLOYING, DISABLED, PARTIALLYDEPLOYED, PENDING Specifies the duration in seconds without any requests after which the containers will be downscaled to their minimum scale value as defined by scale.min. If set, this helps in optimizing resource usage by reducing the number of container instances during periods of inactivity.
x >= 0120
List of API keys for the inference instance
["key1", "key2"]Was this page helpful?