Opencost系列-03安装部署

  • Published
  • Posted in Cloud
  • 9 mins read
  • Updated
  • 0 Comments

1.准备工作

⚠️ 安装过程中所需image需上传至私有harbor仓库。

# 由于国内网络环境问题,请直接下载prometheus chart包,并解压。
https://github.com/prometheus-community/helm-charts/#
unzip helm-charts-main.zip
 
# 下载opencost的scrape_job(用于opencost集成到prometheus,为prometheus提供计费数据)
# 可直接加入到prometheus的value.yaml文件当中,不需要单独用-f引用
https://raw.githubusercontent.com/opencost/opencost/develop/kubernetes/prometheus/extraScrapeConfigs.yaml

# 由于国内网络环境问题,请直接下载opencost chart包,并解压。
https://github.com/opencost/opencost-helm-chart
unzip opencost-helm-chart-main.zip

 

2.通过helm安装prometheus

# 配置
# 进入chart目录
cd helm-charts-main/charts/prometheus

# 修改yaml,参见4.2小节
vi values.yam

# 由于prometheus安装时缺省需要依赖,所以从上一级目录把4个依赖拷到当前chart的charts/下
cp -r ../alertmanager ../kube-state-metrics ../prometheus-node-exporter ../prometheus-pushgateway charts/

# 通过Helm安装prometheus(请在安装前修改附录部分4.2的values.yaml文件)。  
helm install prometheus . 
  --namespace prometheus-system 
  --create-namespace 
  --set prometheus-pushgateway.enabled=false 
  --set alertmanager.enabled=false 
  --set global.imageRegistry=10.118.17.28:30002 
  -f values.yaml  

# 检查
# 运行以下命令,检查Helm release是否成功安装:
helm list --namespace prometheus-system

# 查看Prometheus的相关Pods是否已经启动,并且没有出现错误:
kubectl get pods -n prometheus-system

# 查看Prometheus服务是否创建,并且能够正确访问:
kubectl get svc -n prometheus-system
NAME                                  TYPE        CLUSTER-IP     EXTERNAL-IP   PORT(S)    AGE
prometheus-kube-state-metrics         ClusterIP   10.87.112.9    <none>        8080/TCP   56m
prometheus-prometheus-node-exporter   ClusterIP   10.87.156.62   <none>        9100/TCP   56m
prometheus-server                     ClusterIP   10.93.219.51   <none>        80/TCP     56m

# 如果你遇到任何问题,可以查看Prometheus Pods的日志,检查是否有错误或警告信息。
kubectl logs -n prometheus-system <prometheus-server-pod-name>

# 查看configmap
kubectl describe configmap prometheus-server -n prometheus-system

# 其它
# 删除
helm uninstall prometheus --namespace prometheus-system

# 重启 Prometheus server
kubectl rollout restart deployment prometheus-server -n prometheus-system

 

3.通过helm安装opencost

# 配置
cd opencost-helm-chart-main/charts/opencost

# 安装opencost(请在安装前修改好values.yaml文件,参看附录部分4.3)
helm install opencost . --namespace opencost

# 通过nodeport临时暴露出端口来供访问(#可以在values.yaml文件中固化)
kubectl -n opencost patch svc opencost -p '{"spec":{"type":"NodePort"}}'

# 检查
helm list --namespace opencost
kubectl get svc opencost -n opencost
kubectl get pods -n opencost

# 其它
helm upgrade opencost . --namespace opencost

helm uninstall opencost --namespace opencost

 

4.附录

⚠️ 1.所有没有在value.yaml文件中标注image tag的镜像,可以去对应目录下的Chart.yaml中去获取具体的image tag,也就是image的版本。

4.1 extraScrapeConfigs.yaml 文件

将此文件的内容粘贴至4.2 的values.yaml文件中的extraScrapeConfigs: 部分

extraScrapeConfigs: |
  - job_name: opencost
    honor_labels: true
    scrape_interval: 1m
    scrape_timeout: 10s
    metrics_path: /metrics
    scheme: http
    dns_sd_configs:
    - names:
      - opencost.opencost
      type: 'A'
      port: 9003

 

4.2 prometheus的values.yaml文件

#=======================================
#  helm安装下的prometheus的values.yaml文件
#=======================================
# Note:非完整文件,主要显示标红的修改部分。
# 1.image path modify
# 2.nodePort for prometheus
# 3.add ht-explorer
... ...   ... ...   
configmapReload:
  ## URL for configmap-reload to use for reloads
  ##
  reloadUrl: ""

  ## env sets environment variables to pass to the container. Can be set as name/value pairs,
  ## read from secrets or configmaps.
  env: []
    # - name: SOMEVAR
    #   value: somevalue
    # - name: PASSWORD
    #   valueFrom:
    #     secretKeyRef:
    #       name: mysecret
    #       key: password
    #       optional: false

  prometheus:
    ## If false, the configmap-reload container will not be deployed
    ##
    enabled: true

    ## configmap-reload container name
    ##
    name: configmap-reload

    ## configmap-reload container image
    image:
      repository: 10.118.17.28:30002/prometheus/prometheus-config-reloader
      tag: v0.86.2
      # When digest is set to a non-empty value, images will be pulled by digest (regardless of tag value).
      digest: ""
      pullPolicy: IfNotPresent

    ## config-reloader's container port and port name for probes and metrics
    containerPort: 8080
    containerPortName: metrics
... ...   ... ...   
server:
  ## Prometheus server container name
  ##
  name: server
  ## Prometheus server container image
  ##
  image:
    repository: 10.118.17.28:30002/prometheus/prometheus
    # if not set appVersion field from Chart.yaml is used
    tag: ""
    # When digest is set to a non-empty value, images will be pulled by digest (regardless of tag value).
    digest: ""
    pullPolicy: IfNotPresent

  ## Prometheus server command
  ##
  command: []
  ... ...   ... ...
  service:
    ## If false, no Service will be created for the Prometheus server
    ##
    enabled: true
    annotations: {}
    labels: {}
    clusterIP: ""
    externalIPs: []

    loadBalancerIP: ""
    loadBalancerSourceRanges: []
    servicePort: 80
    sessionAffinity: None
    type: NodePort
    nodePort: 30090
  ... ...   ... ...
      # i think if you not config alone ksm&node exporter,can use this service dynamic
      # that's right ??need to confirm
      - job_name: 'kubernetes-service-endpoints'
        honor_labels: true

        kubernetes_sd_configs:
          - role: endpoints

        relabel_configs:
          - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
            action: keep
  ... ...   ... ...
extraScrapeConfigs: |
  # ht-exporter
  - job_name: 'ht-exporter'
    kubernetes_sd_configs:
    - role: pod
      namespaces:
        names:
        - prometheus-system
    relabel_configs:
    - source_labels: [__meta_kubernetes_pod_label_app]
      action: keep
      regex: ht-exporter
    - source_labels: [__meta_kubernetes_pod_ip]
      target_label: __address__
      replacement: $1:8002
    - source_labels: [__meta_kubernetes_pod_node_name]
      target_label: node
  # opencost
  - job_name: opencost
    honor_labels: true
    scrape_interval: 1m
    scrape_timeout: 10s
    metrics_path: /metrics
    scheme: http
    dns_sd_configs:
    - names:
      - opencost.opencost
      type: 'A'
      port: 9003
... ... ... ...

 

4.3 opencost的values.yaml文件

#========================
#  opencoast的values.yaml文件
#========================
# 1. 修改3处image路径

# -- Overwrite the default name of the chart
nameOverride: ""
# -- Overwrite all resources name created by the chart
fullnameOverride: ""
# -- Override the deployment namespace
namespaceOverride: ""
# -- Override the default name of cluster - Can be found in /etc/kubernetes/admin.conf: clusters -> cluster -> name
clusterName: "cluster.local"

loglevel: info

plugins:
  enabled: false
  install:
    enabled: true
    fullImageName: 10.118.17.28:30002/opencost/curl:latest
    securityContext:
      allowPrivilegeEscalation: false
      seccompProfile:
        type: RuntimeDefault
      capabilities:
        drop:
        - ALL
      readOnlyRootFilesystem: true
      runAsNonRoot: true
      runAsUser: 1000
... ...
# 修改以什么方式暴露服务
service:
  enabled: true
  # --  Annotations to add to the service
  annotations: {}
  # --  Labels to add to the service account
  labels: {}
  # --  Kubernetes Service type
  type: ClusterIP
  # -- NodePort if service type is NodePort
  nodePort: {}
  # -- extra ports.  Useful for sidecar pods such as oauth-proxy
  extraPorts: []
    # - name: oauth-proxy
    #   port: 8081
    #   targetPort: 8081
    # - name: oauth-metrics
    #   port: 8082
    #   targetPort: 8082
  # -- LoadBalancer Source IP CIDR if service type is LoadBalancer and cloud provider supports this
  loadBalancerSourceRanges: []
... ...
opencost:
... ...   ... ...
  exporter:
    # API_PORT for the cost-model to listen on
    apiPort: 9003
    # debugPort: 40000 # for development purposes (debugging with delve) and not for production.
    # -- The GCP Pricing API requires a key. This is supplied just for evaluation.
    cloudProviderApiKey: ""
    # -- Default cluster ID to use if cluster_id is not set in Prometheus metrics.
    defaultClusterId: 'default-cluster'
    # -- If clusterIdConfigmap is defined, use user-generated ConfigMap with key CLUSTER_ID as default cluster ID.
    # -- This overrides the above defaultClusterId. Ensure the ConfigMap exists and contains the required CLUSTER_ID key.
    # clusterIdConfigmap: cluster-id-configmap
    image:
      # -- Exporter container image registry
      registry: 10.118.17.28:30002
      # -- Exporter container image name
      repository: opencost/opencost
      # -- Exporter container image tag
      tag: "1.118.0"
      # -- Exporter container image pull policy
      pullPolicy: IfNotPresent
      # -- Override the full image name for development purposes
      fullImageName: null
    # -- List of extra arguments for the command, e.g.: log-format=json
    extraArgs: []
    # -- Optional command to override the default container command
    command: []
    # -- Number of OpenCost replicas to run
    replicas: 1
    resources:
      # -- CPU/Memory resource requests
      requests:
        cpu: '10m'
        memory: '55Mi'
      # -- CPU/Memory resource limits
      limits:
        memory: '1Gi'
... ...   ... ...
    # Persistent volume claim for storing the data. eg: csv file
# 修改可以挂载永久存储
    persistence:
      enabled: false
      # -- The path that the PV will be mounted to the exporter at
      mountPath: /mnt/export
      # -- Annotations for persistent volume
      annotations: {}
      # -- Access mode for persistent volume
      accessMode: ""
      # -- Storage class for persistent volume
      storageClass: ""
      # -- Size for persistent volume
      size: ""
... ...
# 通过修改这部分可以自定义价格。
  customPricing:
    # -- Enables custom pricing configuration
    enabled: true
    # -- Customize the configmap name used for custom pricing
    configmapName: custom-pricing-model
    # -- Path for the pricing configuration.
    configPath: /tmp/custom-config
    # -- Configures the pricing model provided in the values file.
    createConfigmap: true
    # -- Sets the provider type for the custom pricing file.
    provider: custom
    # -- More information about these values here: https://www.opencost.io/docs/configuration/on-prem#custom-pricing-using-the-opencost-helm-chart
    costModel:
      description: Modified pricing configuration.
      CPU: 1.25
      spotCPU: 0.006655
      RAM: 0.50
      spotRAM: 0.000892
      GPU: 0.95
      storage: 0.25
      zoneNetworkEgress: 0.01
      regionNetworkEgress: 0.01
      internetNetworkEgress: 0.12

  retention1d: 15
  retention1h: 49

# 使能碳排放成本
  carbonCost:
    # -- Enable carbon cost exposed in the API
    enabled: true
... ...   ... ...
  metrics:
    kubeStateMetrics:
      # -- (bool) Enable emission of pod annotations
      emitPodAnnotations: ~
      # -- (bool) Enable emission of namespace annotations
      emitNamespaceAnnotations: ~
      # -- (bool) Enable emission of KSM v1 metrics
      emitKsmV1Metrics: ~
      # -- (bool) Enable only emission of KSM v1 metrics that do not exist in KSM 2 by default
      emitKsmV1MetricsOnly: ~
    ... ...   ... ...
    config:
      # -- Enables creating the metrics.json configuration as a ConfigMap
      enabled: false
      # -- Customize the configmap name used for metrics
      configmapName: custom-metrics
      # -- List of metrics to be disabled
      disabledMetrics: []
      # - <metric-to-be-disabled>
      # - <metric-to-be-disabled>
# opencost能成功抓取prometheus的数据在于这块的配置。
  prometheus:
    # -- Secret name that contains credentials for Prometheus
    secret_name: ~
    # -- Existing secret name that contains credentials for Prometheus
    existingSecretName: ~
    # -- Prometheus Basic auth username
    username: ""
    # -- Key in the secret that references the username
    username_key: DB_BASIC_AUTH_USERNAME
    # -- Prometheus Basic auth password
    password: ""
    # -- Key in the secret that references the password
    password_key: DB_BASIC_AUTH_PW
    # -- Prometheus Bearer token
    bearer_token: ""
    bearer_token_key: DB_BEARER_TOKEN
    # -- If true, opencost will use kube-rbac-proxy to authenticate with in cluster Prometheus for openshift
    kubeRBACProxy: false
    # -- Whether to disable SSL certificate verification
    insecureSkipVerify: false
    external:
      # -- Use external Prometheus (eg. Grafana Cloud)
      enabled: false
      # -- External Prometheus url
      url: "https://prometheus.example.com/prometheus"
    internal:
      # -- Use in-cluster Prometheus
      enabled: true
      # -- Service name of in-cluster Prometheus
      serviceName: prometheus-server
      # -- Namespace of in-cluster Prometheus
      namespaceName: prometheus-system
      # -- Service port of in-cluster Prometheus
      port: 80
      # -- Path to access the Prometheus API, this is neccesary if the Prometheus server is behind a reverse proxy(mimir) or has a different path.
      path: ""
      # -- Scheme to use for in-cluster Prometheus
      scheme: http
    amp:
      # -- Use Amazon Managed Service for Prometheus (AMP)
      enabled: false # If true, opencost will be configured to remote_write and query from Amazon Managed Service for Prometheus.
      # -- Workspace ID for AMP
      workspaceId: ""
    thanos:
      enabled: false
      queryOffset: ''
      maxSourceResolution: ''
      internal:
        enabled: true
        serviceName: my-thanos-query
        namespaceName: opencost
        port: 10901
        scheme: http
      external:
        enabled: false
        url: 'https://thanos-query.example.com/thanos'

  ui:
    # -- Enable OpenCost UI
    enabled: true
    image:
      # -- UI container image registry
      registry: 10.118.17.28:30002
      # -- UI container image name
      repository: opencost/opencost-ui
      # -- UI container image tag
      # @default -- `""` (use appVersion in Chart.yaml)
      tag: "1.118.0"
      # -- UI container image pull policy
      pullPolicy: IfNotPresent
      # -- Override the full image name for development purposes
      fullImageName: null
    resources:
      # -- CPU/Memory resource requests
      requests:
        cpu: '10m'
        memory: '55Mi'
      # -- CPU/Memory resource limits
      limits:
        memory: '1Gi'
... ...   ... ...
extraVolumes: []

 

4.4 GPU测试用例

apiVersion: v1
kind: Namespace
metadata:
  name: gpu-test
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: gpu-sleeper
  namespace: gpu-test
spec:
  replicas: 1
  selector:
    matchLabels:
      app: gpu-sleeper
  template:
    metadata:
      labels:
        app: gpu-sleeper
    spec:
      nodeSelector:
        kubernetes.io/hostname: k8s-worker-26
        mars-tech.com/gpu.installed: "true"
      containers:
      - name: gpu-sleeper
        image: 10.118.17.28:30002/linux/busybox:1.36
        command: ["sh", "-c", "sleep 360000"]
        resources:
          requests:
            mars-tech.com/gpu: "1"
          limits:
            mars-tech.com/gpu: "1"

 

Leave a Reply