카테고리 없음

Stackable

참도다리 2023. 11. 26. 08:14
목차
1. 소개
2. 설치
3. Demo1:Trino-Taxi-Data
4.Getting Started


 

Stackable Data Platform (SDP) : 오픈소스 데이터 관련 다양한 애플리케이션(Apache Spark, Apache Superset, Apache HBase, Apache Airflow 등) 을 손쉽게 쿠버네티스 환경에 배포할 수 있으며 관리할 수 있다.

 

  • 데이터 관련 제공 오퍼레이터 - Link
    • Airflow, Druid, HBase, Hadoop HDFS, Hive, Kafka, NiFi, Spark, Superset, Trino, ZooKeeper

 

EKS 원클릭 배포

 v1.26 - CloudFormation_Link

  • myesk-bastion-EC2 를 기존 AL2 대신 → Ubuntu 22.04 : 접속 시 ubuntu 사용
  • 워커 노드 : 인스턴스 타입 c5.2xlarge , 툴 설치 batcat
# YAML 파일 다운로드
curl -O https://s3.ap-northeast-2.amazonaws.com/cloudformation.cloudneta.net/EKS/eks-oneclick-2.yaml

# CloudFormation 스택 배포
# aws cloudformation deploy --template-file eks-oneclick.yaml --stack-name myeks --parameter-overrides KeyName=<My SSH Keyname> SgIngressSshCidr=<My Home Public IP Address>/32 MyIamUserAccessKeyID=<IAM User의 액세스키> MyIamUserSecretAccessKey=<IAM User의 시크릿 키> ClusterBaseName='<eks 이름>' --region ap-northeast-2
예시) aws cloudformation deploy --template-file eks-oneclick-2.yaml --stack-name myeks --parameter-overrides KeyName=kp-gasida SgIngressSshCidr=$(curl -s ipinfo.io/ip)/32  MyIamUserAccessKeyID=AKIA5... MyIamUserSecretAccessKey='CVNa2...' ClusterBaseName=myeks --region ap-northeast-2

# CloudFormation 스택 배포 완료 후 작업용 EC2 IP 출력
aws cloudformation describe-stacks --stack-name myeks --query 'Stacks[*].Outputs[0].OutputValue' --output text

# 작업용 EC2 SSH 접속
ssh -i ~/.ssh/kp-gasida.pem ubuntu@$(aws cloudformation describe-stacks --stack-name myeks --query 'Stacks[*].Outputs[0].OutputValue' --output text)

-----------------
# batcat
## echo "alias cat='batcat --paging=never'" 해당 내용을 /etc/profile에 추가해둠
cat precmd.yaml
cat precmd.yaml -p
cat precmd.yaml -n
cat /var/log/syslog
batcat -h
batcat -L

# 정보 확인 : v1.26, c5.2xlarge
kubectl get node --label-columns=node.kubernetes.io/instance-type
NAME                                               STATUS   ROLES    AGE   VERSION                INSTANCE-TYPE
ip-192-168-1-37.ap-northeast-2.compute.internal    Ready    <none>   42m   v1.26.10-eks-e71965b   c5.2xlarge
ip-192-168-2-253.ap-northeast-2.compute.internal   Ready    <none>   42m   v1.26.10-eks-e71965b   c5.2xlarge
ip-192-168-3-207.ap-northeast-2.compute.internal   Ready    <none>   42m   v1.26.10-eks-e71965b   c5.2xlarge

 

 

기본 작업 cmd

# 노드 PrivateIP 변수 지정
N1=$(kubectl get node --label-columns=topology.kubernetes.io/zone --selector=topology.kubernetes.io/zone=ap-northeast-2a -o jsonpath={.items[0].status.addresses[0].address})
N2=$(kubectl get node --label-columns=topology.kubernetes.io/zone --selector=topology.kubernetes.io/zone=ap-northeast-2b -o jsonpath={.items[0].status.addresses[0].address})
N3=$(kubectl get node --label-columns=topology.kubernetes.io/zone --selector=topology.kubernetes.io/zone=ap-northeast-2c -o jsonpath={.items[0].status.addresses[0].address})
echo "export N1=$N1" >> /etc/profile
echo "export N2=$N2" >> /etc/profile
echo "export N3=$N3" >> /etc/profile
echo $N1, $N2, $N3

# 노드 보안그룹에 eksctl-host 에서 노드(파드)에 접속 가능하게 룰(Rule) 추가 설정
NGSGID=$(aws ec2 describe-security-groups --filters Name=group-name,Values='*ng1*' --query "SecurityGroups[*].[GroupId]" --output text)
aws ec2 authorize-security-group-ingress --group-id $NGSGID --protocol '-1' --cidr 192.168.1.100/32

# AWS LoadBalancer Controller
helm repo add eks https://aws.github.io/eks-charts
helm install aws-load-balancer-controller eks/aws-load-balancer-controller -n kube-system --set clusterName=$CLUSTER_NAME \
  --set serviceAccount.create=false --set serviceAccount.name=aws-load-balancer-controller

# ExternalDNS 컨트롤러 설치 
MyDomain=<자신의 도메인>
echo "export MyDomain=<자신의 도메인>" >> /etc/profile
MyDomain=gasida.link
echo "export MyDomain=gasida.link" >> /etc/profile
MyDnsHostedZoneId=$(aws route53 list-hosted-zones-by-name --dns-name "${MyDomain}." --query "HostedZones[0].Id" --output text)
echo $MyDomain, $MyDnsHostedZoneId
curl -s -O https://raw.githubusercontent.com/cloudneta/cnaeblab/master/_data/externaldns.yaml
MyDomain=$MyDomain MyDnsHostedZoneId=$MyDnsHostedZoneId envsubst < externaldns.yaml | kubectl apply -f -

# kube-ops-view
helm repo add geek-cookbook https://geek-cookbook.github.io/charts/
helm install kube-ops-view geek-cookbook/kube-ops-view --version 1.2.2 --set env.TZ="Asia/Seoul" --namespace kube-system
kubectl patch svc -n kube-system kube-ops-view -p '{"spec":{"type":"LoadBalancer"}}'
kubectl annotate service kube-ops-view -n kube-system "external-dns.alpha.kubernetes.io/hostname=kubeopsview.$MyDomain"
echo -e "Kube Ops View URL = http://kubeopsview.$MyDomain:8080/#scale=4.0"

# ebs gp3 스토리지 클래스 생성 : 파일시스템 xfs
kubectl patch sc gp2 -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}'
kubectl apply -f https://raw.githubusercontent.com/gasida/DOIK/main/1/gp3-sc-xfs.yaml

# efs 스토리지 클래스 생성 : 실습 편리를 위해서 삭제 정책은 Delete(기본값)을 사용
cat <<EOT > efs-sc.yaml
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata: 
  name: efs-sc
provisioner: efs.csi.aws.com
parameters: 
  provisioningMode: efs-ap
  fileSystemId: $EFS_ID
  directoryPerms: "700"
EOT
kubectl apply -f efs-sc.yaml

# 프로메테우스-스택 생성
kubectl create ns monitoring
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
CERT_ARN=`aws acm list-certificates --query 'CertificateSummaryList[].CertificateArn[]' --output text`
## 파라미터 파일 생성
cat <<EOT > monitor-values.yaml
prometheus:
  prometheusSpec:
    podMonitorSelectorNilUsesHelmValues: false
    serviceMonitorSelectorNilUsesHelmValues: false
    #probeSelectorNilUsesHelmValues: false
    retention: 5d
    retentionSize: "10GiB"
    scrapeInterval: '15s'
    evaluationInterval: '15s'

  ingress:
    enabled: true
    ingressClassName: alb
    hosts: 
      - prometheus.$MyDomain
    paths: 
      - /*
    annotations:
      alb.ingress.kubernetes.io/scheme: internet-facing
      alb.ingress.kubernetes.io/target-type: ip
      alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
      alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
      alb.ingress.kubernetes.io/success-codes: 200-399
      alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
      alb.ingress.kubernetes.io/group.name: study
      alb.ingress.kubernetes.io/ssl-redirect: '443'

grafana:
  defaultDashboardsTimezone: Asia/Seoul
  adminPassword: prom-operator
  defaultDashboardsEnabled: false

  ingress:
    enabled: true
    ingressClassName: alb
    hosts: 
      - grafana.$MyDomain
    paths: 
      - /*
    annotations:
      alb.ingress.kubernetes.io/scheme: internet-facing
      alb.ingress.kubernetes.io/target-type: ip
      alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
      alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
      alb.ingress.kubernetes.io/success-codes: 200-399
      alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
      alb.ingress.kubernetes.io/group.name: study
      alb.ingress.kubernetes.io/ssl-redirect: '443'

defaultRules:
  create: false
kubeEtcd:
  enabled: false
alertmanager:
  enabled: false
EOT

## 배포
helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack --version 51.7.0 \
-f monitor-values.yaml --namespace monitoring

## 그라파나 ingress 도메인으로 웹 접속 : 기본 계정 - admin / prom-operator
echo -e "Grafana Web URL = https://grafana.$MyDomain"

# Metrics-server 배포
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml

 

집 PC에서 직접 Stackable ENDPOINTS(워커 노드의 NodePort)로 접속을 위한 설정 → 아래 보안 그룹 추가 후 접속 가능

# 워커노드의 '#-nodegroup-ng1-remoteAccess' 보안 그룹에 자신의 집 공인IP 접속 허용 추가
NGSGID=$(aws ec2 describe-security-groups --filters Name=group-name,Values='*ng1*' --query "SecurityGroups[*].[GroupId]" --output text)
aws ec2 authorize-security-group-ingress --group-id $NGSGID --protocol '-1' --cidr $(curl -s ipinfo.io/ip)/32

 

Stackable 설치(1.0.0-rc3)

- Docs Link Github Customization

 

Customization :: stackablectl :: SDP Management :: Stackable Documentation

If you’re working for a large company, chances are that there are multiple teams using the Stackable Data Platform. A single team can also operate multiple Stackable Data Platforms. stackablectl is build in a way customers or developers can define their

docs.stackable.tech

# 다운로드
#curl -L -o stackablectl https://github.com/stackabletech/stackable-cockpit/releases/download/stackablectl-1.0.0-rc2/stackablectl-x86_64-unknown-linux-gnu
curl -L -o stackablectl https://github.com/stackabletech/stackable-cockpit/releases/download/stackablectl-1.0.0-rc3/stackablectl-x86_64-unknown-linux-gnu
chmod +x stackablectl
mv stackablectl /usr/local/bin

# 확인
stackablectl -h
stackablectl -V
stackablectl 1.0.0-rc3

stackablectl release list
...

# 자동완성
wget https://raw.githubusercontent.com/stackabletech/stackable-cockpit/main/extra/completions/stackablectl.bash
mv stackablectl.bash /etc/bash_completion.d/

# 제공 오퍼레이터
stackablectl operator list

# 제공 스택
stackablectl stack list

# 제공 데모 : Stackable release 설치 > 스택 구성 > 데이터 구성
stackablectl demo list

Stackable Cockpit : 웹 기반 SDP을 통해 스택을 배포 및 관리, 현재 preview - - Link

 

 

Demo1: Trino-Taxi-Data

설치 및 기본 확인 : Analysis with a data lake - Link Demo

# The data was put into the S3 storage → Trino enables you to query the data via SQL → Superset was used as a web-based frontend to execute SQL statements and build dashboards.

# Demo 정보 확인
stackablectl demo list
stackablectl demo list -o json | jq
stackablectl demo describe trino-taxi-data
 Demo             trino-taxi-data                                                                                              
 Description      Demo loading 2.5 years of New York taxi data into S3 bucket, creating a Trino table and a Superset dashboard 
 Documentation    https://docs.stackable.tech/stackablectl/stable/demos/trino-taxi-data.html                                   
 Stackable stack  trino-superset-s3                                                                                            
 Labels           trino, superset, minio, s3, ny-taxi-data

 

 

 

  • Spin up the follow data products
    • MinIO: A S3 compatible object store. This demo uses it as persistent storage to store all the data used
    • Hive metastore: A service that stores metadata related to Apache Hive and other services. This demo uses it as metadata storage for Trino - Link
    • Trino: A fast distributed SQL query engine for big data analytics that helps you explore your data universe. This demo uses it to enable SQL access to the data
    • Superset: A modern data exploration and visualization platform. This demo utilizes Superset to retrieve data from Trino via SQL queries and build dashboards on top of that data
    • Open policy agent (OPA): An open source, general-purpose policy engine that unifies policy enforcement across the stack. This demo uses it as the authorizer for Trino, which decides which user is able to query which data.
  • Load testdata into S3. It contains 2.5 years of New York City taxi trips
  • Make data accessible via SQL in Trino
  • Create Superset dashboards for visualization of the data

 

 

# [터미널] 모니터링
watch -d "kubectl get pod -n stackable-operators;echo;kubectl get pod,job,svc,pvc"

# 데모 설치 : 데이터셋 다운로드 job 포함 8분 정도 소요
stackablectl demo install trino-taxi-data

# 설치 확인
helm list -n stackable-operators
helm list

kubectl top node
kubectl top pod -A

kubectl get-all -n default
kubectl get deploy,sts,pod
kubectl get job
kubectl get job load-ny-taxi-data -o yaml | kubectl neat | cat -l yaml
kubectl get job create-ny-taxi-data-table-in-trino -o yaml | kubectl neat | cat -l yaml
kubectl get job setup-superset -o yaml | kubectl neat | cat -l yaml
kubectl get job superset -o yaml | kubectl neat | cat -l yaml

kubectl get sc,pvc,pv
kubectl get pv |grep gp3
kubectl get sc secrets.stackable.tech -o yaml | kubectl neat | cat -l yaml
kubectl df-pv
kubectl get svc,ep,endpointslices
kubectl get cm,secret

kubectl get cm minio -o yaml | kubectl neat | cat -l yaml
kubectl describe cm minio

kubectl get cm hive-metastore-default -o yaml | kubectl neat | cat -l yaml
kubectl get cm hive -o yaml | kubectl neat | cat -l yaml
kubectl get cm postgresql-hive-extended-configuration -o yaml | kubectl neat | cat -l yaml

kubectl get cm trino-coordinator-default -o yaml | kubectl neat | cat -l yaml
kubectl get cm trino-coordinator-default-catalog -o yaml | kubectl neat | cat -l yaml
kubectl get cm trino-worker-default -o yaml | kubectl neat | cat -l yaml
kubectl get cm trino-worker-default-catalog -o yaml | kubectl neat | cat -l yaml
kubectl get cm create-ny-taxi-data-table-in-trino-script -o yaml | kubectl neat | cat -l yaml

kubectl get cm superset-node-default -o yaml | kubectl neat | cat -l yaml
kubectl get cm superset-init-db -o yaml | kubectl neat | cat -l yaml
kubectl get cm setup-superset-script -o yaml | kubectl neat | cat -l yaml

kubectl get secret minio -o yaml | kubectl neat | cat -l yaml
kubectl get secret minio-s3-credentials -o yaml | kubectl neat | cat -l yaml
kubectl get secret postgresql-hive -o yaml | kubectl neat | cat -l yaml
kubectl get secret postgresql-superset -o yaml | kubectl neat | cat -l yaml
kubectl get secret trino-users -o yaml | kubectl neat | cat -l yaml
kubectl get secret trino-internal-secret -o yaml | kubectl neat | cat -l yaml
kubectl get secret superset-credentials -o yaml | kubectl neat | cat -l yaml
kubectl get secret superset-mapbox-api-key -o yaml | kubectl neat | cat -l yaml

kubectl get crd | grep stackable
kubectl explain trinoclusters
kubectl describe trinoclusters.trino.stackable.tech

kubectl get hivecluster,opacluster,s3connection
kubectl get supersetcluster,supersetdb
kubectl get trinocluster,trinocatalog

kubectl get hivecluster -o yaml | kubectl neat | cat -l yaml
kubectl get s3connection -o yaml | kubectl neat | cat -l yaml
kubectl get supersetcluster -o yaml | kubectl neat | cat -l yaml
kubectl get supersetdb -o yaml | kubectl neat | cat -l yaml
kubectl get trinocluster -o yaml | kubectl neat | cat -l yaml
kubectl get trinocatalog -o yaml | kubectl neat | cat -l yaml


# 배포 스택 정보 확인 : 바로 확인 하지 말고, 설치 완료 후 아래 확인 할 것 - Endpoint(접속 주소 정보), Conditions(상태 정보)
stackablectl stacklet list
┌────────────┬──────────────────────────────────┬────────────┬──────────────────────────────────────────────────┬─────────────────────────────────┐
│ PRODUCT    ┆ NAME                             ┆ NAMESPACE  ┆ ENDPOINTS                                        ┆ CONDITIONS                      │
╞════════════╪══════════════════════════════════╪════════════╪══════════════════════════════════════════════════╪═════════════════════════════════╡
│ hive       ┆ hive                             ┆ default    ┆                                                  ┆ Available, Reconciling, Running │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ opa        ┆ opa                              ┆ default    ┆                                                  ┆ Available, Reconciling, Running │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ superset   ┆ superset                         ┆ default    ┆ external-superset   http://43.202.112.25:31493   ┆ Available, Reconciling, Running │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ trino      ┆ trino                            ┆ default    ┆ coordinator-metrics 15.164.129.120:30531         ┆ Available, Reconciling, Running │
│            ┆                                  ┆            ┆ coordinator-https   https://15.164.129.120:31597 ┆                                 │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ grafana    ┆ kube-prometheus-stack-grafana    ┆ monitoring ┆                                                  ┆                                 │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ minio      ┆ minio-console                    ┆ default    ┆ http                http://3.35.25.225:30697     ┆                                 │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ prometheus ┆ kube-prometheus-stack-prometheus ┆ monitoring ┆                                                  ┆                                 │
└────────────┴──────────────────────────────────┴────────────┴──────────────────────────────────────────────────┴─────────────────────────────────┘

# 배포 스택의 product 접속 계정 정보 확인 : 대부분 admin / adminadmin 계정 정보 사용
stackablectl stacklet credentials superset superset
stackablectl stacklet credentials minio minio-console  # admin / adminadmin 계정 정보 출력 안됨... 아직은 rc 단계라 그런듯

# 배포 오퍼레이터 확인
stackablectl operator installed
┌───────────────────┬─────────┬─────────────────────┬──────────┬─────────────────────────────────────────┐
│ OPERATOR          ┆ VERSION ┆ NAMESPACE           ┆ STATUS   ┆ LAST UPDATED                            │
╞═══════════════════╪═════════╪═════════════════════╪══════════╪═════════════════════════════════════════╡
│ commons-operator  ┆ 23.7.0  ┆ stackable-operators ┆ deployed ┆ 2023-11-19 10:37:56.08217875 +0900 KST  │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hive-operator     ┆ 23.7.0  ┆ stackable-operators ┆ deployed ┆ 2023-11-19 10:38:13.358512684 +0900 KST │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ opa-operator      ┆ 23.7.0  ┆ stackable-operators ┆ deployed ┆ 2023-11-19 10:38:32.724016087 +0900 KST │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ secret-operator   ┆ 23.7.0  ┆ stackable-operators ┆ deployed ┆ 2023-11-19 10:38:51.410402351 +0900 KST │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ superset-operator ┆ 23.7.0  ┆ stackable-operators ┆ deployed ┆ 2023-11-19 10:38:56.963602496 +0900 KST │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ trino-operator    ┆ 23.7.0  ┆ stackable-operators ┆ deployed ┆ 2023-11-19 10:39:15.346593878 +0900 KST │
└───────────────────┴─────────┴─────────────────────┴──────────┴─────────────────────────────────────────┘

기본 작업 CMD에서 미 설정 시 : 집 PC에서 직접 ENDPOINTS(워커 노드의 NodePort)로 접속 설정 → 아래 보안 그룹 추가 후 접속 가능

# 워커노드의 '#-nodegroup-ng1-remoteAccess' 보안 그룹에 자신의 집 공인IP 접속 허용 추가
NGSGID=$(aws ec2 describe-security-groups --filters Name=group-name,Values='*ng1*' --query "SecurityGroups[*].[GroupId]" --output text)
aws ec2 authorize-security-group-ingress --group-id $NGSGID --protocol '-1' --cidr $(curl -s ipinfo.io/ip)/32

 

 

(참고/옵션) ingress 로 접속 시
 
# 파일 생성 : minio 예시
cat <<EOT > minio-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata: 
  annotations: 
    alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
    alb.ingress.kubernetes.io/group.name: study
    alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
    alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
    alb.ingress.kubernetes.io/scheme: internet-facing
    alb.ingress.kubernetes.io/ssl-redirect: "443"
    alb.ingress.kubernetes.io/success-codes: 200-399
    alb.ingress.kubernetes.io/target-type: ip
  labels: 
    app: minio
  name: minio
spec: 
  ingressClassName: alb
  rules: 
  - host: minio.$MyDomain
    http: 
      paths: 
      - backend: 
          service: 
            name: minio-console
            port: 
              number: 9001
        path: /*
        pathType: ImplementationSpecific
EOT

# ingress 생성
kubectl apply -f minio-ingress.yaml

# 확인
kubectl get ingress -A
echo -e "minio URL = https://minio.$MyDomain"

 

 

Inspect data in S3 - Docs MinIO MinIO_Operator

  • PRODUCT minio ENDPOINTS console-http 접속 : admin / adminadmin
    • Click on the blue button Browse on the bucket demo and open the folders ny-taxi-data → raw
  • As you can see the demo uploaded 1GB of parquet files, one file per month. The data contain taxi rides in New York City
  • The demo loaded 2.5 years of taxi trip data from New York City with 68 million records and a total size of 1GB in parquet files.
    • 필드 : 승차 및 하차 날짜/시간, 승차 및 하차 위치, 이동 거리, 항목별 요금, 요금 유형, 지불 유형 및 운전자가 보고한 승객 수 - Link

You can see the file size (and therefore the number of rides) decrease drastically because of the Covid-19 pandemic starting from 2020-03.

  • Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.
#
kubectl get svc,ep minio-console

# 데이터셋 다운로드 동작 확인
kubectl get job load-ny-taxi-data -o yaml | kubectl neat | cat -l yaml -p
...
spec:
      containers:
      - command:
        - bash
        - -c
        - cd /tmp && for month in 2020-01 2020-02 2020-03 2020-04 2020-05 2020-06
          2020-07 2020-08 2020-09 2020-10 2020-11 2020-12 2021-01 2021-02 2021-03
          2021-04 2021-05 2021-06 2021-07 2021-08 2021-09 2021-10 2021-11 2021-12
          2022-01 2022-02 2022-03 2022-04; do curl -O https://repo.stackable.tech/repository/misc/ny-taxi-data/yellow_tripdata_$month.parquet
          && mc --insecure alias set minio http://minio:9000/ $(cat /minio-s3-credentials/accessKey)
          $(cat /minio-s3-credentials/secretKey) && mc cp yellow_tripdata_$month.parquet minio/demo/ny-taxi-data/raw/; done
...

## 샘플 다운로드 : '연도-월' 만 바꿔서 다운로드 가능
curl -O https://repo.stackable.tech/repository/misc/ny-taxi-data/yellow_tripdata_2023-01.parquet

# 추가 정보
kubectl get cm minio -o yaml | kubectl neat | cat -l yaml
kubectl describe cm minio

kubectl get secret minio -o yaml | kubectl neat | cat -l yaml
kubectl get secret minio-s3-credentials -o yaml | kubectl neat | cat -l yaml

 

 

  • Use Trino Web Interface - Docs Trino Concept
    • Trino, a query engine that runs at ludicrous speed : Fast distributed SQL query engine for big data analytics - Link
  • Trino offers SQL access to the data within S3
  • PRODUCT trino ENDPOINTS coordinator-https 접속 : admin / adminadmin
  • When you start executing SQL queries you will see the queries getting processed here.
     
     
#
kubectl get svc,ep trino-coordinator

#
kubectl get job create-ny-taxi-data-table-in-trino -o yaml | kubectl neat | cat -l yaml

kubectl get trinocluster,trinocatalog
kubectl get trinocluster -o yaml | kubectl neat | cat -l yaml -p
kubectl get trinocatalog -o yaml | kubectl neat | cat -l yaml -p
...
spec:
    connector:      # hive, s3
      hive:
        metastore:
          configMap: hive
        s3:
          reference: minio
...

#
kubectl get cm trino-coordinator-default -o yaml | kubectl neat | cat -l yaml
kubectl get cm trino-coordinator-default-catalog -o yaml | kubectl neat | cat -l yaml -p
...
data:
  hive.properties: |
    connector.name=hive
    hive.metastore.uri=thrift\://hive-metastore-default-0.hive-metastore-default.default.svc.cluster.local\:9083
    hive.s3.aws-access-key=${ENV\:CATALOG_HIVE_HIVE_S3_AWS_ACCESS_KEY}
    hive.s3.aws-secret-key=${ENV\:CATALOG_HIVE_HIVE_S3_AWS_SECRET_KEY}
    hive.s3.endpoint=http\://minio\:9000
    hive.s3.path-style-access=true
    hive.s3.ssl.enabled=false
    hive.security=allow-all
...

kubectl get cm trino-worker-default -o yaml | kubectl neat | cat -l yaml
kubectl get cm trino-worker-default-catalog -o yaml | kubectl neat | cat -l yaml
kubectl get cm create-ny-taxi-data-table-in-trino-script -o yaml | kubectl neat | cat -l yaml

#
kubectl get secret trino-users -o yaml | kubectl neat | cat -l yaml
kubectl get secret trino-internal-secret -o yaml | kubectl neat | cat -l yaml

 

 

trino-operator를 통해서 trino worker 2대로 증설
# 오퍼레이터 로깅 수준을 높여보자..
kubectl logs -n stackable-operators -l app.kubernetes.io/instance=trino-operator -f

# trino worker 2대로 증설
kubectl get trinocluster trino -o json | cat -l json -p
kubectl patch trinocluster trino --type='json' -p='[{"op": "replace", "path": "/spec/workers/roleGroups/default/replicas", "value":2}]'

 

 

다시 trino worker 1대로 축소

kubectl patch trinocluster trino --type='json' -p='[{"op": "replace", "path": "/spec/workers/roleGroups/default/replicas", "value":1}]'

 

 

 

Use Superset Web Interface - Docs Superset Connect_Trino

  • Apache Superset™ is an open-source modern data exploration and visualization platform
  • Superset gives the ability to execute SQL queries and build dashboards
  • PRODUCT superset ENDPOINTS external-superset 접속 : admin / adminadmin
  • On the top click on the tab Dashboards → Click on the dashboard called Taxi data.
    • It might take some time until the dashboards renders all the included charts. ⇒ 다소 시간 걸림(새로 고침), Trino 같이 확인
# You can clearly see the impact of Covid-19 on the taxi business → ‘20.3월 이후 부터 급격히 감소했음을 확인 할 수 있다

#
kubectl get svc,ep superset-external

#
kubectl get job setup-superset -o yaml | kubectl neat | cat -l yaml
kubectl get job superset -o yaml | kubectl neat | cat -l yaml

kubectl get supersetcluster,supersetdb
kubectl get supersetcluster -o yaml | kubectl neat | cat -l yaml -p
kubectl get supersetdb -o yaml | kubectl neat | cat -l yaml -p

#
kubectl get cm superset-init-db -o yaml | kubectl neat | cat -l yaml -p
kubectl get cm superset-node-default -o yaml | kubectl neat | cat -l yaml -p
kubectl get cm setup-superset-script -o yaml | kubectl neat | cat -l yaml -p

#
kubectl get secret superset-credentials -o yaml | kubectl neat | cat -l yaml
kubectl get secret superset-mapbox-api-key -o yaml | kubectl neat | cat -l yaml

Execute arbitrary SQL statements

  • Within Superset you can not only create dashboards but also run arbitrary SQL statements. On the top click on the tab SQL Lab → SQL Editor.
  • On the left select the database Trino, the schema demo and set See table schema to ny_taxi_data.
select
  format_datetime(tpep_pickup_datetime, 'YYYY/MM') as month,
  count(*) as trips,
  sum(total_amount) as sales,
  avg(duration_min) as avg_duration_min
from ny_taxi_data
group by 1
order by 1

 

# How many taxi trips there where in the year 2021?

select
  count(*) as trips
from ny_taxi_data
where year(tpep_pickup_datetime) = 2021


# What was the maximum amount of passengers?

select
  max(passenger_count) as max_passenger_count
from ny_taxi_data;

# Returns 112 passengers. Well that’s weird. Let’s examine the passengers distribution.
select
  passenger_count,
  count(*) as frequency
from ny_taxi_data
group by 1
order by 1 desc
limit 100

# What was the highest tip (measured in percentage of the original fee) ever given?

select
  total_amount as fee,
  tip_amount as tip,
  tip_amount / total_amount * 100 as tip_percentage
from ny_taxi_data
where total_amount > 0
order by 3 desc
limit 5

(옵션/참고) Where to go from here - Link

 

Demo 삭제

#
kubectl delete supersetcluster,supersetdb superset
kubectl delete trinocluster trino && kubectl delete trinocatalog hive
kubectl delete hivecluster hive
kubectl delete s3connection minio
kubectl delete opacluster opa

#
helm uninstall postgresql-superset
helm uninstall postgresql-hive
helm uninstall minio

#
kubectl delete job --all
kubectl delete pvc --all

#
kubectl delete cm create-ny-taxi-data-table-in-trino-script setup-superset-script trino-opa-bundle
kubectl delete secret minio-s3-credentials secret-provisioner-tls-ca superset-credentials superset-mapbox-api-key trino-users
kubectl delete sa superset-sa

# operator 삭제
stackablectl operator uninstall superset trino hive secret opa commons

# 남은 리소스 확인
kubectl get-all -n stackable-operators

Getting Started

Getting Started :: Stackable Documentation

 

Overview : Stackable is based on Kubernetes and uses this as the control plane to manage clusters.

  • In this guide we will build a simple cluster with 3 services; Apache ZooKeeper, Apache Kafka and Apache NiFi.
  • Installing Stackable Operators - Link
# [터미널1] 모니터링
watch -d "kubectl get pod -n stackable-operators"

# [터미널2] 설치
stackablectl release list
stackablectl release install -i commons -i secret -i zookeeper -i kafka -i nifi 23.7
[INFO ] Installing release 23.7
[INFO ] Installing commons operator in version 23.7.0
[INFO ] Installing kafka operator in version 23.7.0
[INFO ] Installing nifi operator in version 23.7.0
[INFO ] Installing secret operator in version 23.7.0
[INFO ] Installing zookeeper operator in version 23.7.0

# 설치 확인
helm list -n stackable-operators
stackablectl operator installed
kubectl get crd | grep stackable.tech
kubectl get pod

 

 

 

Deploying Stackable Services - Link

# 모니터링
watch -d kubectl get pod,job,svc,pvc

# Apache ZooKeeper
kubectl apply -f - <<EOF
---
apiVersion: zookeeper.stackable.tech/v1alpha1
kind: ZookeeperCluster
metadata:
  name: simple-zk
spec:
  image:
    productVersion: "3.8.1"
    stackableVersion: "23.7"
  clusterConfig:
    tls:
      serverSecretClass: null
  servers:
    roleGroups:
      primary:
        replicas: 1
        config:
          myidOffset: 10
---
apiVersion: zookeeper.stackable.tech/v1alpha1
kind: ZookeeperZnode
metadata:
  name: simple-zk-znode
spec:
  clusterRef:
    name: simple-zk
EOF


# 설치 확인
kubectl get zookeepercluster,zookeeperznode
kubectl get pod,svc,ep,pvc -l app.kubernetes.io/instance=simple-zk
kubectl describe pod -l app.kubernetes.io/instance=simple-zk

# 실시간 로그 확인
kubectl logs -l app.kubernetes.io/instance=simple-zk -c zookeeper -f

 

 

  • Apache Kafka : We will deploy an Apache Kafka broker that depends on the ZooKeeper service we just deployed.
  • The zookeeperReference property below points to the namespace and name we gave to the ZooKeeper service deployed previously.
kubectl apply -f - <<EOF
---
apiVersion: kafka.stackable.tech/v1alpha1
kind: KafkaCluster
metadata:
  name: simple-kafka
spec:
  image:
    productVersion: "3.4.0"
    stackableVersion: "23.7"
  clusterConfig:
    zookeeperConfigMapName: simple-kafka-znode
    tls:
      serverSecretClass: null
  brokers:
    roleGroups:
      brokers:
        replicas: 3
---
apiVersion: zookeeper.stackable.tech/v1alpha1
kind: ZookeeperZnode
metadata:
  name: simple-kafka-znode
spec:
  clusterRef:
    name: simple-zk
    namespace: default
EOF

# 설치 확인
kubectl get kafkacluster,zookeeperznode
kubectl get pod,svc,ep,pvc -l app.kubernetes.io/instance=simple-kafka
kubectl describe pod -l app.kubernetes.io/instance=simple-kafka

# 실시간 로그 확인
kubectl logs -l app.kubernetes.io/instance=simple-kafka -c kafka -f

 

 

Kafka UI - 링크

#
helm repo add kafka-ui https://provectus.github.io/kafka-ui-charts
cat <<EOF > kafkaui-values.yml
yamlApplicationConfig:
  kafka:
    clusters:
      - name: yaml
        bootstrapServers: simple-kafka-broker-brokers:9092
  auth:
    type: disabled
  management:
    health:
      ldap:
        enabled: false
EOF

# 설치
helm install kafka-ui kafka-ui/kafka-ui -f kafkaui-values.yml

# 접속 확인
kubectl patch svc kafka-ui -p '{"spec":{"type":"LoadBalancer"}}'
kubectl annotate service kafka-ui "external-dns.alpha.kubernetes.io/hostname=kafka-ui.$MyDomain"
echo -e "kafka-ui Web URL = http://kafka-ui.$MyDomain"

Apache NiFi - 링크 소개

kubectl apply -f - <<EOF
---
apiVersion: zookeeper.stackable.tech/v1alpha1
kind: ZookeeperZnode
metadata:
  name: simple-nifi-znode
spec:
  clusterRef:
    name: simple-zk
---
apiVersion: v1
kind: Secret
metadata:
  name: nifi-admin-credentials-simple
stringData:
  username: admin
  password: AdminPassword
---
apiVersion: nifi.stackable.tech/v1alpha1
kind: NifiCluster
metadata:
  name: simple-nifi
spec:
  image:
    productVersion: "1.21.0"
    stackableVersion: "23.7"
  clusterConfig:
    listenerClass: external-unstable
    zookeeperConfigMapName: simple-nifi-znode
    authentication:
      method:
        singleUser:
          adminCredentialsSecret: nifi-admin-credentials-simple
    sensitiveProperties:
      keySecret: nifi-sensitive-property-key
      autoGenerate: true
  nodes:
    roleGroups:
      default:
        replicas: 1
EOF



# 설치 확인 : job 완료까지 다소 시간 소요됨
kubectl get nificluster,zookeeperznode
kubectl get pod,svc,ep,pvc,job -l app.kubernetes.io/instance=simple-nifi
kubectl describe job.batch/simple-nifi-create-reporting-task-1-21-0

kubectl describe pod -l app.kubernetes.io/instance=simple-nifi
...
Args:
      /stackable/python/create_nifi_reporting_task.py -n https://simple-nifi.default.svc.cluster.local:8443/nifi-api -u "$(cat /stackable/adminuser/username | grep -oP '((cn|dn|uid)=\K[^,]+|.*)' | head -n 1)" -p "$(cat /stackable/adminuser/password)" -v 1.21.0 -m 8081 -c /stackable/cert/ca.crt
...

# 실시간 로그 확인
kubectl logs -l app.kubernetes.io/instance=simple-nifi -c nifi -f

 

Testing your cluster - Link

# 설치확인
stackablectl stacklet list
┌────────────┬──────────────────────────────────┬────────────┬─────────────────────────────────────┬─────────────────────────────────┐
│ PRODUCT    ┆ NAME                             ┆ NAMESPACE  ┆ ENDPOINTS                           ┆ CONDITIONS                      │
╞════════════╪══════════════════════════════════╪════════════╪═════════════════════════════════════╪═════════════════════════════════╡
│ kafka      ┆ simple-kafka                     ┆ default    ┆ metrics 3.35.25.225:32611           ┆ Available, Reconciling, Running │
│            ┆                                  ┆            ┆ kafka   3.35.25.225:32283           ┆                                 │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ nifi       ┆ simple-nifi                      ┆ default    ┆ https   https://43.202.112.25:32669 ┆ Available, Reconciling, Running │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ zookeeper  ┆ simple-zk                        ┆ default    ┆                                     ┆ Available, Reconciling, Running │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ grafana    ┆ kube-prometheus-stack-grafana    ┆ monitoring ┆                                     ┆                                 │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ prometheus ┆ kube-prometheus-stack-prometheus ┆ monitoring ┆                                     ┆                                 │
└────────────┴──────────────────────────────────┴────────────┴─────────────────────────────────────┴─────────────────────────────────┘

# Apache ZooKeeper
# ZooKeeper CLI shell
kubectl exec -i -t simple-zk-server-primary-0 -c zookeeper -- bin/zkCli.sh
------------------
# znodes 확인
# You can run the ls / command to see the list of znodes in the root path, 
# which should include those created by Apache Kafka and Apache NiFi.
ls /
[znode-5fef78a9-71e1-4250-bc35-ced60243d60f, znode-b0bf14f8-a1f6-4b31-aaba-4f4bbc68767d, znode-c45d9efd-a071-4723-943a-79d5fe49a162, zookeeper]

quit
------------------



# Apache Kafka
# 토픽 생성
kubectl exec -it simple-kafka-broker-brokers-0 -c kafka -- bin/kafka-topics.sh --bootstrap-server localhost:9092 --create --topic demo
...
Created topic demo.
...

# 토픽 확인
kubectl exec -it simple-kafka-broker-brokers-0 -c kafka -- bin/kafka-topics.sh --bootstrap-server localhost:9092 --list
...
demo
...

 

 

Apache NiFi : PRODUCT nifi ENDPOINTS https 접속 : admin / AdminPassword

# NiFi admin 계정의 암호 확인
kubectl get secrets nifi-admin-credentials-simple -o jsonpath="{.data.password}" | base64 -d && echo
AdminPassword

 

 

배포한 리소스 삭제

# Apache NiFi 삭제
kubectl delete nificluster simple-nifi && kubectl delete zookeeperznode simple-nifi-znode

# kafka-ui 삭제
helm uninstall kafka-ui

# Apache kafka 삭제
kubectl delete kafkacluster simple-kafka && kubectl delete zookeeperznode simple-kafka-znode

# Apache ZooKeeper 삭제
kubectl delete zookeepercluster simple-zk && kubectl delete zookeeperznode simple-zk-znode

# secret, pvc 삭제
kubectl delete secret nifi-admin-credentials-simple nifi-sensitive-property-key secret-provisioner-tls-ca
kubectl delete pvc --all

# operator 삭제
stackablectl operator uninstall nifi kafka zookeeper secret commons

# 남은 리소스 확인
kubectl get-all -n stackable-operators

 

Stackable Operator

Operator : Operators manage the individual data products of the Stackable Data Platform - Link

# list
stackablectl operator list
┌───────────┬────────────────────────────────────────────────────────────────────────────────────────┐
│ Operator  ┆ Stable versions                                                                        │
╞═══════════╪════════════════════════════════════════════════════════════════════════════════════════╡
│ airflow   ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.6.0, 0.5.0, 0.4.0, 0.3.0, 0.2.0, 0.1.0               │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ commons   ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.4.0, 0.3.0, 0.2.1, 0.2.0, 0.1.0                      │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ druid     ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.8.0, 0.7.0, 0.6.0, 0.5.0, 0.4.0, 0.3.0, 0.2.0, 0.1.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hbase     ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.5.0, 0.4.0, 0.3.0, 0.2.0                             │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hdfs      ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.6.0, 0.5.0, 0.4.0, 0.3.0                             │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hive      ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.8.0, 0.7.0, 0.6.0, 0.5.0, 0.3.0                      │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ kafka     ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.8.0, 0.7.0, 0.6.0, 0.5.0, 0.4.0                      │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ listener  ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0                                                         │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ nifi      ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.8.1, 0.8.0, 0.7.0, 0.6.0, 0.5.0, 0.4.0               │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ opa       ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.11.0, 0.10.0, 0.9.0, 0.8.0, 0.7.0, 0.6.0             │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ secret    ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.6.0, 0.5.0, 0.4.0, 0.3.0, 0.2.0, 0.1.0               │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ spark-k8s ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.6.0, 0.5.0, 0.4.0, 0.3.0, 0.2.0, 0.1.0               │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ superset  ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.7.0, 0.6.0, 0.5.0, 0.4.0, 0.3.0, 0.2.0, 0.1.0        │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ trino     ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.8.0, 0.7.0, 0.6.0, 0.5.0, 0.4.0, 0.3.1, 0.3.0, 0.2.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ zookeeper ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.12.0, 0.11.0, 0.10.0, 0.9.0, 0.8.0, 0.7.0, 0.6.0     │
└───────────┴────────────────────────────────────────────────────────────────────────────────────────┘

# 개별 정보 확인
stackablectl operator describe airflow
 Operator         airflow                                                                                                 
 Stable versions  23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.6.0, 0.5.0, 0.4.0, 0.3.0, 0.2.0, 0.1.0                                
 Test versions    0.0.0-pr317, 0.0.0-pr316, 0.0.0-pr315, 0.0.0-pr314, 0.0.0-pr312, 0.0.0-pr311, 0.0.0-pr310, 0.0.0-pr308,  0.0.0-pr307, 0.0.0-pr305, 0.0.0-pr304, 0.0.0-pr303                                                      
 Dev versions     0.0.0-dev
 
#  Install operator
# [터미널1] 모니터링
watch -d kubectl get pod,job,svc,pvc

# [터미널2] 설치
stackablectl operator install airflow commons secret
[INFO ] Installing airflow operator
[INFO ] Installing commons operator
[INFO ] Installing secret operator

# 확인
stackablectl operator installed
OPERATOR              VERSION         NAMESPACE                      STATUS           LAST UPDATED
airflow               0.0.0-dev       default                        deployed         2023-08-24 08:35:44.418557107 +0000 UTC
commons               0.0.0-dev       default                        deployed         2023-08-24 08:36:07.429509054 +0000 UTC
secret                0.0.0-dev       default                        deployed         2023-08-24 08:36:30.002822765 +0000 UTC

kubectl get crd | grep stackable.tech
kubectl get pod
NAME                                           READY   STATUS    RESTARTS   AGE
airflow-operator-deployment-7cd445fd69-mpwbb   1/1     Running   0          3m16s
commons-operator-deployment-577f88c697-478f6   1/1     Running   0          2m53s
secret-operator-daemonset-9djvc                3/3     Running   0          2m30s
secret-operator-daemonset-9x7np                3/3     Running   0          2m30s
secret-operator-daemonset-gsm8j                3/3     Running   0          2m30s


# Uninstall operator
stackablectl operator uninstall airflow commons secret
stackablectl operator installed

Release : A release is a bundle of operators of a specific stable version

  • The stable versions of the operators are tested and proven to work hand in hand. If you want to install a single individual operator, have a look at the Operator command.
#
stackablectl release list
stackablectl release describe 23.7
 Release            23.7                                                  
 Release date       2023-07-26                                            
 Description        Sixth release focusing on resources and pod overrides 
 Included products                                                        
                                                                          
┌───────────┬──────────────────┐
│ Product   ┆ Operator version │
╞═══════════╪══════════════════╡
│ airflow   ┆ 23.7.0           │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ commons   ┆ 23.7.0           │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ druid     ┆ 23.7.0           │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hbase     ┆ 23.7.0           │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hdfs      ┆ 23.7.0           │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hive      ┆ 23.7.0           │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ kafka     ┆ 23.7.0           │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ listener  ┆ 23.7.0           │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ nifi      ┆ 23.7.0           │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ opa       ┆ 23.7.0           │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ secret    ┆ 23.7.0           │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ spark-k8s ┆ 23.7.0           │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ superset  ┆ 23.7.0           │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ trino     ┆ 23.7.0           │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ zookeeper ┆ 23.7.0           │