목차
1. 소개
2. 설치
3. Demo1:Trino-Taxi-Data
4.Getting Started
Stackable Data Platform (SDP) : 오픈소스 데이터 관련 다양한 애플리케이션(Apache Spark, Apache Superset, Apache HBase, Apache Airflow 등) 을 손쉽게 쿠버네티스 환경에 배포할 수 있으며 관리할 수 있다.
- 데이터 관련 제공 오퍼레이터 - Link
- Airflow, Druid, HBase, Hadoop HDFS, Hive, Kafka, NiFi, Spark, Superset, Trino, ZooKeeper
EKS 원클릭 배포
v1.26 - CloudFormation_Link
- myesk-bastion-EC2 를 기존
AL2대신 → Ubuntu 22.04 : 접속 시 ubuntu 사용 - 워커 노드 : 인스턴스 타입 c5.2xlarge , 툴 설치 batcat
# YAML 파일 다운로드
curl -O https://s3.ap-northeast-2.amazonaws.com/cloudformation.cloudneta.net/EKS/eks-oneclick-2.yaml
# CloudFormation 스택 배포
# aws cloudformation deploy --template-file eks-oneclick.yaml --stack-name myeks --parameter-overrides KeyName=<My SSH Keyname> SgIngressSshCidr=<My Home Public IP Address>/32 MyIamUserAccessKeyID=<IAM User의 액세스키> MyIamUserSecretAccessKey=<IAM User의 시크릿 키> ClusterBaseName='<eks 이름>' --region ap-northeast-2
예시) aws cloudformation deploy --template-file eks-oneclick-2.yaml --stack-name myeks --parameter-overrides KeyName=kp-gasida SgIngressSshCidr=$(curl -s ipinfo.io/ip)/32 MyIamUserAccessKeyID=AKIA5... MyIamUserSecretAccessKey='CVNa2...' ClusterBaseName=myeks --region ap-northeast-2
# CloudFormation 스택 배포 완료 후 작업용 EC2 IP 출력
aws cloudformation describe-stacks --stack-name myeks --query 'Stacks[*].Outputs[0].OutputValue' --output text
# 작업용 EC2 SSH 접속
ssh -i ~/.ssh/kp-gasida.pem ubuntu@$(aws cloudformation describe-stacks --stack-name myeks --query 'Stacks[*].Outputs[0].OutputValue' --output text)
-----------------
# batcat
## echo "alias cat='batcat --paging=never'" 해당 내용을 /etc/profile에 추가해둠
cat precmd.yaml
cat precmd.yaml -p
cat precmd.yaml -n
cat /var/log/syslog
batcat -h
batcat -L
# 정보 확인 : v1.26, c5.2xlarge
kubectl get node --label-columns=node.kubernetes.io/instance-type
NAME STATUS ROLES AGE VERSION INSTANCE-TYPE
ip-192-168-1-37.ap-northeast-2.compute.internal Ready <none> 42m v1.26.10-eks-e71965b c5.2xlarge
ip-192-168-2-253.ap-northeast-2.compute.internal Ready <none> 42m v1.26.10-eks-e71965b c5.2xlarge
ip-192-168-3-207.ap-northeast-2.compute.internal Ready <none> 42m v1.26.10-eks-e71965b c5.2xlarge
기본 작업 cmd
# 노드 PrivateIP 변수 지정
N1=$(kubectl get node --label-columns=topology.kubernetes.io/zone --selector=topology.kubernetes.io/zone=ap-northeast-2a -o jsonpath={.items[0].status.addresses[0].address})
N2=$(kubectl get node --label-columns=topology.kubernetes.io/zone --selector=topology.kubernetes.io/zone=ap-northeast-2b -o jsonpath={.items[0].status.addresses[0].address})
N3=$(kubectl get node --label-columns=topology.kubernetes.io/zone --selector=topology.kubernetes.io/zone=ap-northeast-2c -o jsonpath={.items[0].status.addresses[0].address})
echo "export N1=$N1" >> /etc/profile
echo "export N2=$N2" >> /etc/profile
echo "export N3=$N3" >> /etc/profile
echo $N1, $N2, $N3
# 노드 보안그룹에 eksctl-host 에서 노드(파드)에 접속 가능하게 룰(Rule) 추가 설정
NGSGID=$(aws ec2 describe-security-groups --filters Name=group-name,Values='*ng1*' --query "SecurityGroups[*].[GroupId]" --output text)
aws ec2 authorize-security-group-ingress --group-id $NGSGID --protocol '-1' --cidr 192.168.1.100/32
# AWS LoadBalancer Controller
helm repo add eks https://aws.github.io/eks-charts
helm install aws-load-balancer-controller eks/aws-load-balancer-controller -n kube-system --set clusterName=$CLUSTER_NAME \
--set serviceAccount.create=false --set serviceAccount.name=aws-load-balancer-controller
# ExternalDNS 컨트롤러 설치
MyDomain=<자신의 도메인>
echo "export MyDomain=<자신의 도메인>" >> /etc/profile
MyDomain=gasida.link
echo "export MyDomain=gasida.link" >> /etc/profile
MyDnsHostedZoneId=$(aws route53 list-hosted-zones-by-name --dns-name "${MyDomain}." --query "HostedZones[0].Id" --output text)
echo $MyDomain, $MyDnsHostedZoneId
curl -s -O https://raw.githubusercontent.com/cloudneta/cnaeblab/master/_data/externaldns.yaml
MyDomain=$MyDomain MyDnsHostedZoneId=$MyDnsHostedZoneId envsubst < externaldns.yaml | kubectl apply -f -
# kube-ops-view
helm repo add geek-cookbook https://geek-cookbook.github.io/charts/
helm install kube-ops-view geek-cookbook/kube-ops-view --version 1.2.2 --set env.TZ="Asia/Seoul" --namespace kube-system
kubectl patch svc -n kube-system kube-ops-view -p '{"spec":{"type":"LoadBalancer"}}'
kubectl annotate service kube-ops-view -n kube-system "external-dns.alpha.kubernetes.io/hostname=kubeopsview.$MyDomain"
echo -e "Kube Ops View URL = http://kubeopsview.$MyDomain:8080/#scale=4.0"
# ebs gp3 스토리지 클래스 생성 : 파일시스템 xfs
kubectl patch sc gp2 -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}'
kubectl apply -f https://raw.githubusercontent.com/gasida/DOIK/main/1/gp3-sc-xfs.yaml
# efs 스토리지 클래스 생성 : 실습 편리를 위해서 삭제 정책은 Delete(기본값)을 사용
cat <<EOT > efs-sc.yaml
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
name: efs-sc
provisioner: efs.csi.aws.com
parameters:
provisioningMode: efs-ap
fileSystemId: $EFS_ID
directoryPerms: "700"
EOT
kubectl apply -f efs-sc.yaml
# 프로메테우스-스택 생성
kubectl create ns monitoring
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
CERT_ARN=`aws acm list-certificates --query 'CertificateSummaryList[].CertificateArn[]' --output text`
## 파라미터 파일 생성
cat <<EOT > monitor-values.yaml
prometheus:
prometheusSpec:
podMonitorSelectorNilUsesHelmValues: false
serviceMonitorSelectorNilUsesHelmValues: false
#probeSelectorNilUsesHelmValues: false
retention: 5d
retentionSize: "10GiB"
scrapeInterval: '15s'
evaluationInterval: '15s'
ingress:
enabled: true
ingressClassName: alb
hosts:
- prometheus.$MyDomain
paths:
- /*
annotations:
alb.ingress.kubernetes.io/scheme: internet-facing
alb.ingress.kubernetes.io/target-type: ip
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
alb.ingress.kubernetes.io/success-codes: 200-399
alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
alb.ingress.kubernetes.io/group.name: study
alb.ingress.kubernetes.io/ssl-redirect: '443'
grafana:
defaultDashboardsTimezone: Asia/Seoul
adminPassword: prom-operator
defaultDashboardsEnabled: false
ingress:
enabled: true
ingressClassName: alb
hosts:
- grafana.$MyDomain
paths:
- /*
annotations:
alb.ingress.kubernetes.io/scheme: internet-facing
alb.ingress.kubernetes.io/target-type: ip
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
alb.ingress.kubernetes.io/success-codes: 200-399
alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
alb.ingress.kubernetes.io/group.name: study
alb.ingress.kubernetes.io/ssl-redirect: '443'
defaultRules:
create: false
kubeEtcd:
enabled: false
alertmanager:
enabled: false
EOT
## 배포
helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack --version 51.7.0 \
-f monitor-values.yaml --namespace monitoring
## 그라파나 ingress 도메인으로 웹 접속 : 기본 계정 - admin / prom-operator
echo -e "Grafana Web URL = https://grafana.$MyDomain"
# Metrics-server 배포
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
집 PC에서 직접 Stackable ENDPOINTS(워커 노드의 NodePort)로 접속을 위한 설정 → 아래 보안 그룹 추가 후 접속 가능
# 워커노드의 '#-nodegroup-ng1-remoteAccess' 보안 그룹에 자신의 집 공인IP 접속 허용 추가
NGSGID=$(aws ec2 describe-security-groups --filters Name=group-name,Values='*ng1*' --query "SecurityGroups[*].[GroupId]" --output text)
aws ec2 authorize-security-group-ingress --group-id $NGSGID --protocol '-1' --cidr $(curl -s ipinfo.io/ip)/32
Stackable 설치(1.0.0-rc3)
- Docs Link Github Customization
# 다운로드
#curl -L -o stackablectl https://github.com/stackabletech/stackable-cockpit/releases/download/stackablectl-1.0.0-rc2/stackablectl-x86_64-unknown-linux-gnu
curl -L -o stackablectl https://github.com/stackabletech/stackable-cockpit/releases/download/stackablectl-1.0.0-rc3/stackablectl-x86_64-unknown-linux-gnu
chmod +x stackablectl
mv stackablectl /usr/local/bin
# 확인
stackablectl -h
stackablectl -V
stackablectl 1.0.0-rc3
stackablectl release list
...
# 자동완성
wget https://raw.githubusercontent.com/stackabletech/stackable-cockpit/main/extra/completions/stackablectl.bash
mv stackablectl.bash /etc/bash_completion.d/
# 제공 오퍼레이터
stackablectl operator list
# 제공 스택
stackablectl stack list
# 제공 데모 : Stackable release 설치 > 스택 구성 > 데이터 구성
stackablectl demo list
Stackable Cockpit : 웹 기반 SDP을 통해 스택을 배포 및 관리, 현재 preview - - Link
Demo1: Trino-Taxi-Data
설치 및 기본 확인 : Analysis with a data lake - Link Demo
# The data was put into the S3 storage → Trino enables you to query the data via SQL → Superset was used as a web-based frontend to execute SQL statements and build dashboards.
# Demo 정보 확인
stackablectl demo list
stackablectl demo list -o json | jq
stackablectl demo describe trino-taxi-data
Demo trino-taxi-data
Description Demo loading 2.5 years of New York taxi data into S3 bucket, creating a Trino table and a Superset dashboard
Documentation https://docs.stackable.tech/stackablectl/stable/demos/trino-taxi-data.html
Stackable stack trino-superset-s3
Labels trino, superset, minio, s3, ny-taxi-data
- Spin up the follow data products
- MinIO: A S3 compatible object store. This demo uses it as persistent storage to store all the data used
- Hive metastore: A service that stores metadata related to Apache Hive and other services. This demo uses it as metadata storage for Trino - Link
- Trino: A fast distributed SQL query engine for big data analytics that helps you explore your data universe. This demo uses it to enable SQL access to the data
- Superset: A modern data exploration and visualization platform. This demo utilizes Superset to retrieve data from Trino via SQL queries and build dashboards on top of that data
- Open policy agent (OPA): An open source, general-purpose policy engine that unifies policy enforcement across the stack. This demo uses it as the authorizer for Trino, which decides which user is able to query which data.
- Load testdata into S3. It contains 2.5 years of New York City taxi trips
- Make data accessible via SQL in Trino
- Create Superset dashboards for visualization of the data
# [터미널] 모니터링
watch -d "kubectl get pod -n stackable-operators;echo;kubectl get pod,job,svc,pvc"
# 데모 설치 : 데이터셋 다운로드 job 포함 8분 정도 소요
stackablectl demo install trino-taxi-data
# 설치 확인
helm list -n stackable-operators
helm list
kubectl top node
kubectl top pod -A
kubectl get-all -n default
kubectl get deploy,sts,pod
kubectl get job
kubectl get job load-ny-taxi-data -o yaml | kubectl neat | cat -l yaml
kubectl get job create-ny-taxi-data-table-in-trino -o yaml | kubectl neat | cat -l yaml
kubectl get job setup-superset -o yaml | kubectl neat | cat -l yaml
kubectl get job superset -o yaml | kubectl neat | cat -l yaml
kubectl get sc,pvc,pv
kubectl get pv |grep gp3
kubectl get sc secrets.stackable.tech -o yaml | kubectl neat | cat -l yaml
kubectl df-pv
kubectl get svc,ep,endpointslices
kubectl get cm,secret
kubectl get cm minio -o yaml | kubectl neat | cat -l yaml
kubectl describe cm minio
kubectl get cm hive-metastore-default -o yaml | kubectl neat | cat -l yaml
kubectl get cm hive -o yaml | kubectl neat | cat -l yaml
kubectl get cm postgresql-hive-extended-configuration -o yaml | kubectl neat | cat -l yaml
kubectl get cm trino-coordinator-default -o yaml | kubectl neat | cat -l yaml
kubectl get cm trino-coordinator-default-catalog -o yaml | kubectl neat | cat -l yaml
kubectl get cm trino-worker-default -o yaml | kubectl neat | cat -l yaml
kubectl get cm trino-worker-default-catalog -o yaml | kubectl neat | cat -l yaml
kubectl get cm create-ny-taxi-data-table-in-trino-script -o yaml | kubectl neat | cat -l yaml
kubectl get cm superset-node-default -o yaml | kubectl neat | cat -l yaml
kubectl get cm superset-init-db -o yaml | kubectl neat | cat -l yaml
kubectl get cm setup-superset-script -o yaml | kubectl neat | cat -l yaml
kubectl get secret minio -o yaml | kubectl neat | cat -l yaml
kubectl get secret minio-s3-credentials -o yaml | kubectl neat | cat -l yaml
kubectl get secret postgresql-hive -o yaml | kubectl neat | cat -l yaml
kubectl get secret postgresql-superset -o yaml | kubectl neat | cat -l yaml
kubectl get secret trino-users -o yaml | kubectl neat | cat -l yaml
kubectl get secret trino-internal-secret -o yaml | kubectl neat | cat -l yaml
kubectl get secret superset-credentials -o yaml | kubectl neat | cat -l yaml
kubectl get secret superset-mapbox-api-key -o yaml | kubectl neat | cat -l yaml
kubectl get crd | grep stackable
kubectl explain trinoclusters
kubectl describe trinoclusters.trino.stackable.tech
kubectl get hivecluster,opacluster,s3connection
kubectl get supersetcluster,supersetdb
kubectl get trinocluster,trinocatalog
kubectl get hivecluster -o yaml | kubectl neat | cat -l yaml
kubectl get s3connection -o yaml | kubectl neat | cat -l yaml
kubectl get supersetcluster -o yaml | kubectl neat | cat -l yaml
kubectl get supersetdb -o yaml | kubectl neat | cat -l yaml
kubectl get trinocluster -o yaml | kubectl neat | cat -l yaml
kubectl get trinocatalog -o yaml | kubectl neat | cat -l yaml
# 배포 스택 정보 확인 : 바로 확인 하지 말고, 설치 완료 후 아래 확인 할 것 - Endpoint(접속 주소 정보), Conditions(상태 정보)
stackablectl stacklet list
┌────────────┬──────────────────────────────────┬────────────┬──────────────────────────────────────────────────┬─────────────────────────────────┐
│ PRODUCT ┆ NAME ┆ NAMESPACE ┆ ENDPOINTS ┆ CONDITIONS │
╞════════════╪══════════════════════════════════╪════════════╪══════════════════════════════════════════════════╪═════════════════════════════════╡
│ hive ┆ hive ┆ default ┆ ┆ Available, Reconciling, Running │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ opa ┆ opa ┆ default ┆ ┆ Available, Reconciling, Running │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ superset ┆ superset ┆ default ┆ external-superset http://43.202.112.25:31493 ┆ Available, Reconciling, Running │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ trino ┆ trino ┆ default ┆ coordinator-metrics 15.164.129.120:30531 ┆ Available, Reconciling, Running │
│ ┆ ┆ ┆ coordinator-https https://15.164.129.120:31597 ┆ │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ grafana ┆ kube-prometheus-stack-grafana ┆ monitoring ┆ ┆ │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ minio ┆ minio-console ┆ default ┆ http http://3.35.25.225:30697 ┆ │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ prometheus ┆ kube-prometheus-stack-prometheus ┆ monitoring ┆ ┆ │
└────────────┴──────────────────────────────────┴────────────┴──────────────────────────────────────────────────┴─────────────────────────────────┘
# 배포 스택의 product 접속 계정 정보 확인 : 대부분 admin / adminadmin 계정 정보 사용
stackablectl stacklet credentials superset superset
stackablectl stacklet credentials minio minio-console # admin / adminadmin 계정 정보 출력 안됨... 아직은 rc 단계라 그런듯
# 배포 오퍼레이터 확인
stackablectl operator installed
┌───────────────────┬─────────┬─────────────────────┬──────────┬─────────────────────────────────────────┐
│ OPERATOR ┆ VERSION ┆ NAMESPACE ┆ STATUS ┆ LAST UPDATED │
╞═══════════════════╪═════════╪═════════════════════╪══════════╪═════════════════════════════════════════╡
│ commons-operator ┆ 23.7.0 ┆ stackable-operators ┆ deployed ┆ 2023-11-19 10:37:56.08217875 +0900 KST │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hive-operator ┆ 23.7.0 ┆ stackable-operators ┆ deployed ┆ 2023-11-19 10:38:13.358512684 +0900 KST │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ opa-operator ┆ 23.7.0 ┆ stackable-operators ┆ deployed ┆ 2023-11-19 10:38:32.724016087 +0900 KST │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ secret-operator ┆ 23.7.0 ┆ stackable-operators ┆ deployed ┆ 2023-11-19 10:38:51.410402351 +0900 KST │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ superset-operator ┆ 23.7.0 ┆ stackable-operators ┆ deployed ┆ 2023-11-19 10:38:56.963602496 +0900 KST │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ trino-operator ┆ 23.7.0 ┆ stackable-operators ┆ deployed ┆ 2023-11-19 10:39:15.346593878 +0900 KST │
└───────────────────┴─────────┴─────────────────────┴──────────┴─────────────────────────────────────────┘
기본 작업 CMD에서 미 설정 시 : 집 PC에서 직접 ENDPOINTS(워커 노드의 NodePort)로 접속 설정 → 아래 보안 그룹 추가 후 접속 가능
# 워커노드의 '#-nodegroup-ng1-remoteAccess' 보안 그룹에 자신의 집 공인IP 접속 허용 추가
NGSGID=$(aws ec2 describe-security-groups --filters Name=group-name,Values='*ng1*' --query "SecurityGroups[*].[GroupId]" --output text)
aws ec2 authorize-security-group-ingress --group-id $NGSGID --protocol '-1' --cidr $(curl -s ipinfo.io/ip)/32
# 파일 생성 : minio 예시
cat <<EOT > minio-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
annotations:
alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
alb.ingress.kubernetes.io/group.name: study
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
alb.ingress.kubernetes.io/scheme: internet-facing
alb.ingress.kubernetes.io/ssl-redirect: "443"
alb.ingress.kubernetes.io/success-codes: 200-399
alb.ingress.kubernetes.io/target-type: ip
labels:
app: minio
name: minio
spec:
ingressClassName: alb
rules:
- host: minio.$MyDomain
http:
paths:
- backend:
service:
name: minio-console
port:
number: 9001
path: /*
pathType: ImplementationSpecific
EOT
# ingress 생성
kubectl apply -f minio-ingress.yaml
# 확인
kubectl get ingress -A
echo -e "minio URL = https://minio.$MyDomain"
Inspect data in S3 - Docs MinIO MinIO_Operator
- PRODUCT minio ENDPOINTS console-http 접속 : admin / adminadmin
- Click on the blue button Browse on the bucket demo and open the folders ny-taxi-data → raw
- As you can see the demo uploaded 1GB of parquet files, one file per month. The data contain taxi rides in New York City
- The demo loaded 2.5 years of taxi trip data from New York City with 68 million records and a total size of 1GB in parquet files.
- 필드 : 승차 및 하차 날짜/시간, 승차 및 하차 위치, 이동 거리, 항목별 요금, 요금 유형, 지불 유형 및 운전자가 보고한 승객 수 - Link
You can see the file size (and therefore the number of rides) decrease drastically because of the Covid-19 pandemic starting from 2020-03.
- Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.
#
kubectl get svc,ep minio-console
# 데이터셋 다운로드 동작 확인
kubectl get job load-ny-taxi-data -o yaml | kubectl neat | cat -l yaml -p
...
spec:
containers:
- command:
- bash
- -c
- cd /tmp && for month in 2020-01 2020-02 2020-03 2020-04 2020-05 2020-06
2020-07 2020-08 2020-09 2020-10 2020-11 2020-12 2021-01 2021-02 2021-03
2021-04 2021-05 2021-06 2021-07 2021-08 2021-09 2021-10 2021-11 2021-12
2022-01 2022-02 2022-03 2022-04; do curl -O https://repo.stackable.tech/repository/misc/ny-taxi-data/yellow_tripdata_$month.parquet
&& mc --insecure alias set minio http://minio:9000/ $(cat /minio-s3-credentials/accessKey)
$(cat /minio-s3-credentials/secretKey) && mc cp yellow_tripdata_$month.parquet minio/demo/ny-taxi-data/raw/; done
...
## 샘플 다운로드 : '연도-월' 만 바꿔서 다운로드 가능
curl -O https://repo.stackable.tech/repository/misc/ny-taxi-data/yellow_tripdata_2023-01.parquet
# 추가 정보
kubectl get cm minio -o yaml | kubectl neat | cat -l yaml
kubectl describe cm minio
kubectl get secret minio -o yaml | kubectl neat | cat -l yaml
kubectl get secret minio-s3-credentials -o yaml | kubectl neat | cat -l yaml
- Use Trino Web Interface - Docs Trino Concept
- Trino, a query engine that runs at ludicrous speed : Fast distributed SQL query engine for big data analytics - Link
- Trino offers SQL access to the data within S3
- PRODUCT trino ENDPOINTS coordinator-https 접속 : admin / adminadmin
-
When you start executing SQL queries you will see the queries getting processed here.
#
kubectl get svc,ep trino-coordinator
#
kubectl get job create-ny-taxi-data-table-in-trino -o yaml | kubectl neat | cat -l yaml
kubectl get trinocluster,trinocatalog
kubectl get trinocluster -o yaml | kubectl neat | cat -l yaml -p
kubectl get trinocatalog -o yaml | kubectl neat | cat -l yaml -p
...
spec:
connector: # hive, s3
hive:
metastore:
configMap: hive
s3:
reference: minio
...
#
kubectl get cm trino-coordinator-default -o yaml | kubectl neat | cat -l yaml
kubectl get cm trino-coordinator-default-catalog -o yaml | kubectl neat | cat -l yaml -p
...
data:
hive.properties: |
connector.name=hive
hive.metastore.uri=thrift\://hive-metastore-default-0.hive-metastore-default.default.svc.cluster.local\:9083
hive.s3.aws-access-key=${ENV\:CATALOG_HIVE_HIVE_S3_AWS_ACCESS_KEY}
hive.s3.aws-secret-key=${ENV\:CATALOG_HIVE_HIVE_S3_AWS_SECRET_KEY}
hive.s3.endpoint=http\://minio\:9000
hive.s3.path-style-access=true
hive.s3.ssl.enabled=false
hive.security=allow-all
...
kubectl get cm trino-worker-default -o yaml | kubectl neat | cat -l yaml
kubectl get cm trino-worker-default-catalog -o yaml | kubectl neat | cat -l yaml
kubectl get cm create-ny-taxi-data-table-in-trino-script -o yaml | kubectl neat | cat -l yaml
#
kubectl get secret trino-users -o yaml | kubectl neat | cat -l yaml
kubectl get secret trino-internal-secret -o yaml | kubectl neat | cat -l yaml
# 오퍼레이터 로깅 수준을 높여보자..
kubectl logs -n stackable-operators -l app.kubernetes.io/instance=trino-operator -f
# trino worker 2대로 증설
kubectl get trinocluster trino -o json | cat -l json -p
kubectl patch trinocluster trino --type='json' -p='[{"op": "replace", "path": "/spec/workers/roleGroups/default/replicas", "value":2}]'
다시 trino worker 1대로 축소
kubectl patch trinocluster trino --type='json' -p='[{"op": "replace", "path": "/spec/workers/roleGroups/default/replicas", "value":1}]'
Use Superset Web Interface - Docs Superset Connect_Trino
- Apache Superset™ is an open-source modern data exploration and visualization platform
- Superset gives the ability to execute SQL queries and build dashboards
- PRODUCT superset ENDPOINTS external-superset 접속 : admin / adminadmin
- On the top click on the tab Dashboards → Click on the dashboard called Taxi data.
- It might take some time until the dashboards renders all the included charts. ⇒ 다소 시간 걸림(새로 고침), Trino 같이 확인
# You can clearly see the impact of Covid-19 on the taxi business → ‘20.3월 이후 부터 급격히 감소했음을 확인 할 수 있다
#
kubectl get svc,ep superset-external
#
kubectl get job setup-superset -o yaml | kubectl neat | cat -l yaml
kubectl get job superset -o yaml | kubectl neat | cat -l yaml
kubectl get supersetcluster,supersetdb
kubectl get supersetcluster -o yaml | kubectl neat | cat -l yaml -p
kubectl get supersetdb -o yaml | kubectl neat | cat -l yaml -p
#
kubectl get cm superset-init-db -o yaml | kubectl neat | cat -l yaml -p
kubectl get cm superset-node-default -o yaml | kubectl neat | cat -l yaml -p
kubectl get cm setup-superset-script -o yaml | kubectl neat | cat -l yaml -p
#
kubectl get secret superset-credentials -o yaml | kubectl neat | cat -l yaml
kubectl get secret superset-mapbox-api-key -o yaml | kubectl neat | cat -l yaml
Execute arbitrary SQL statements
- Within Superset you can not only create dashboards but also run arbitrary SQL statements. On the top click on the tab SQL Lab → SQL Editor.
- On the left select the database Trino, the schema demo and set See table schema to ny_taxi_data.
select
format_datetime(tpep_pickup_datetime, 'YYYY/MM') as month,
count(*) as trips,
sum(total_amount) as sales,
avg(duration_min) as avg_duration_min
from ny_taxi_data
group by 1
order by 1
# How many taxi trips there where in the year 2021?
select
count(*) as trips
from ny_taxi_data
where year(tpep_pickup_datetime) = 2021
# What was the maximum amount of passengers?
select
max(passenger_count) as max_passenger_count
from ny_taxi_data;
# Returns 112 passengers. Well that’s weird. Let’s examine the passengers distribution.
select
passenger_count,
count(*) as frequency
from ny_taxi_data
group by 1
order by 1 desc
limit 100
# What was the highest tip (measured in percentage of the original fee) ever given?
select
total_amount as fee,
tip_amount as tip,
tip_amount / total_amount * 100 as tip_percentage
from ny_taxi_data
where total_amount > 0
order by 3 desc
limit 5
(옵션/참고) Where to go from here - Link
Demo 삭제
#
kubectl delete supersetcluster,supersetdb superset
kubectl delete trinocluster trino && kubectl delete trinocatalog hive
kubectl delete hivecluster hive
kubectl delete s3connection minio
kubectl delete opacluster opa
#
helm uninstall postgresql-superset
helm uninstall postgresql-hive
helm uninstall minio
#
kubectl delete job --all
kubectl delete pvc --all
#
kubectl delete cm create-ny-taxi-data-table-in-trino-script setup-superset-script trino-opa-bundle
kubectl delete secret minio-s3-credentials secret-provisioner-tls-ca superset-credentials superset-mapbox-api-key trino-users
kubectl delete sa superset-sa
# operator 삭제
stackablectl operator uninstall superset trino hive secret opa commons
# 남은 리소스 확인
kubectl get-all -n stackable-operators
Getting Started
Getting Started :: Stackable Documentation
Overview : Stackable is based on Kubernetes and uses this as the control plane to manage clusters.
- In this guide we will build a simple cluster with 3 services; Apache ZooKeeper, Apache Kafka and Apache NiFi.
- Installing Stackable Operators - Link
# [터미널1] 모니터링
watch -d "kubectl get pod -n stackable-operators"
# [터미널2] 설치
stackablectl release list
stackablectl release install -i commons -i secret -i zookeeper -i kafka -i nifi 23.7
[INFO ] Installing release 23.7
[INFO ] Installing commons operator in version 23.7.0
[INFO ] Installing kafka operator in version 23.7.0
[INFO ] Installing nifi operator in version 23.7.0
[INFO ] Installing secret operator in version 23.7.0
[INFO ] Installing zookeeper operator in version 23.7.0
# 설치 확인
helm list -n stackable-operators
stackablectl operator installed
kubectl get crd | grep stackable.tech
kubectl get pod
Deploying Stackable Services - Link
# 모니터링
watch -d kubectl get pod,job,svc,pvc
# Apache ZooKeeper
kubectl apply -f - <<EOF
---
apiVersion: zookeeper.stackable.tech/v1alpha1
kind: ZookeeperCluster
metadata:
name: simple-zk
spec:
image:
productVersion: "3.8.1"
stackableVersion: "23.7"
clusterConfig:
tls:
serverSecretClass: null
servers:
roleGroups:
primary:
replicas: 1
config:
myidOffset: 10
---
apiVersion: zookeeper.stackable.tech/v1alpha1
kind: ZookeeperZnode
metadata:
name: simple-zk-znode
spec:
clusterRef:
name: simple-zk
EOF
# 설치 확인
kubectl get zookeepercluster,zookeeperznode
kubectl get pod,svc,ep,pvc -l app.kubernetes.io/instance=simple-zk
kubectl describe pod -l app.kubernetes.io/instance=simple-zk
# 실시간 로그 확인
kubectl logs -l app.kubernetes.io/instance=simple-zk -c zookeeper -f
- Apache Kafka : We will deploy an Apache Kafka broker that depends on the ZooKeeper service we just deployed.
- The zookeeperReference property below points to the namespace and name we gave to the ZooKeeper service deployed previously.
kubectl apply -f - <<EOF
---
apiVersion: kafka.stackable.tech/v1alpha1
kind: KafkaCluster
metadata:
name: simple-kafka
spec:
image:
productVersion: "3.4.0"
stackableVersion: "23.7"
clusterConfig:
zookeeperConfigMapName: simple-kafka-znode
tls:
serverSecretClass: null
brokers:
roleGroups:
brokers:
replicas: 3
---
apiVersion: zookeeper.stackable.tech/v1alpha1
kind: ZookeeperZnode
metadata:
name: simple-kafka-znode
spec:
clusterRef:
name: simple-zk
namespace: default
EOF
# 설치 확인
kubectl get kafkacluster,zookeeperznode
kubectl get pod,svc,ep,pvc -l app.kubernetes.io/instance=simple-kafka
kubectl describe pod -l app.kubernetes.io/instance=simple-kafka
# 실시간 로그 확인
kubectl logs -l app.kubernetes.io/instance=simple-kafka -c kafka -f
Kafka UI - 링크
#
helm repo add kafka-ui https://provectus.github.io/kafka-ui-charts
cat <<EOF > kafkaui-values.yml
yamlApplicationConfig:
kafka:
clusters:
- name: yaml
bootstrapServers: simple-kafka-broker-brokers:9092
auth:
type: disabled
management:
health:
ldap:
enabled: false
EOF
# 설치
helm install kafka-ui kafka-ui/kafka-ui -f kafkaui-values.yml
# 접속 확인
kubectl patch svc kafka-ui -p '{"spec":{"type":"LoadBalancer"}}'
kubectl annotate service kafka-ui "external-dns.alpha.kubernetes.io/hostname=kafka-ui.$MyDomain"
echo -e "kafka-ui Web URL = http://kafka-ui.$MyDomain"
kubectl apply -f - <<EOF
---
apiVersion: zookeeper.stackable.tech/v1alpha1
kind: ZookeeperZnode
metadata:
name: simple-nifi-znode
spec:
clusterRef:
name: simple-zk
---
apiVersion: v1
kind: Secret
metadata:
name: nifi-admin-credentials-simple
stringData:
username: admin
password: AdminPassword
---
apiVersion: nifi.stackable.tech/v1alpha1
kind: NifiCluster
metadata:
name: simple-nifi
spec:
image:
productVersion: "1.21.0"
stackableVersion: "23.7"
clusterConfig:
listenerClass: external-unstable
zookeeperConfigMapName: simple-nifi-znode
authentication:
method:
singleUser:
adminCredentialsSecret: nifi-admin-credentials-simple
sensitiveProperties:
keySecret: nifi-sensitive-property-key
autoGenerate: true
nodes:
roleGroups:
default:
replicas: 1
EOF
# 설치 확인 : job 완료까지 다소 시간 소요됨
kubectl get nificluster,zookeeperznode
kubectl get pod,svc,ep,pvc,job -l app.kubernetes.io/instance=simple-nifi
kubectl describe job.batch/simple-nifi-create-reporting-task-1-21-0
kubectl describe pod -l app.kubernetes.io/instance=simple-nifi
...
Args:
/stackable/python/create_nifi_reporting_task.py -n https://simple-nifi.default.svc.cluster.local:8443/nifi-api -u "$(cat /stackable/adminuser/username | grep -oP '((cn|dn|uid)=\K[^,]+|.*)' | head -n 1)" -p "$(cat /stackable/adminuser/password)" -v 1.21.0 -m 8081 -c /stackable/cert/ca.crt
...
# 실시간 로그 확인
kubectl logs -l app.kubernetes.io/instance=simple-nifi -c nifi -f
Testing your cluster - Link
# 설치확인
stackablectl stacklet list
┌────────────┬──────────────────────────────────┬────────────┬─────────────────────────────────────┬─────────────────────────────────┐
│ PRODUCT ┆ NAME ┆ NAMESPACE ┆ ENDPOINTS ┆ CONDITIONS │
╞════════════╪══════════════════════════════════╪════════════╪═════════════════════════════════════╪═════════════════════════════════╡
│ kafka ┆ simple-kafka ┆ default ┆ metrics 3.35.25.225:32611 ┆ Available, Reconciling, Running │
│ ┆ ┆ ┆ kafka 3.35.25.225:32283 ┆ │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ nifi ┆ simple-nifi ┆ default ┆ https https://43.202.112.25:32669 ┆ Available, Reconciling, Running │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ zookeeper ┆ simple-zk ┆ default ┆ ┆ Available, Reconciling, Running │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ grafana ┆ kube-prometheus-stack-grafana ┆ monitoring ┆ ┆ │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ prometheus ┆ kube-prometheus-stack-prometheus ┆ monitoring ┆ ┆ │
└────────────┴──────────────────────────────────┴────────────┴─────────────────────────────────────┴─────────────────────────────────┘
# Apache ZooKeeper
# ZooKeeper CLI shell
kubectl exec -i -t simple-zk-server-primary-0 -c zookeeper -- bin/zkCli.sh
------------------
# znodes 확인
# You can run the ls / command to see the list of znodes in the root path,
# which should include those created by Apache Kafka and Apache NiFi.
ls /
[znode-5fef78a9-71e1-4250-bc35-ced60243d60f, znode-b0bf14f8-a1f6-4b31-aaba-4f4bbc68767d, znode-c45d9efd-a071-4723-943a-79d5fe49a162, zookeeper]
quit
------------------
# Apache Kafka
# 토픽 생성
kubectl exec -it simple-kafka-broker-brokers-0 -c kafka -- bin/kafka-topics.sh --bootstrap-server localhost:9092 --create --topic demo
...
Created topic demo.
...
# 토픽 확인
kubectl exec -it simple-kafka-broker-brokers-0 -c kafka -- bin/kafka-topics.sh --bootstrap-server localhost:9092 --list
...
demo
...
Apache NiFi : PRODUCT nifi ENDPOINTS https 접속 : admin / AdminPassword
# NiFi admin 계정의 암호 확인
kubectl get secrets nifi-admin-credentials-simple -o jsonpath="{.data.password}" | base64 -d && echo
AdminPassword
배포한 리소스 삭제
# Apache NiFi 삭제
kubectl delete nificluster simple-nifi && kubectl delete zookeeperznode simple-nifi-znode
# kafka-ui 삭제
helm uninstall kafka-ui
# Apache kafka 삭제
kubectl delete kafkacluster simple-kafka && kubectl delete zookeeperznode simple-kafka-znode
# Apache ZooKeeper 삭제
kubectl delete zookeepercluster simple-zk && kubectl delete zookeeperznode simple-zk-znode
# secret, pvc 삭제
kubectl delete secret nifi-admin-credentials-simple nifi-sensitive-property-key secret-provisioner-tls-ca
kubectl delete pvc --all
# operator 삭제
stackablectl operator uninstall nifi kafka zookeeper secret commons
# 남은 리소스 확인
kubectl get-all -n stackable-operators
Stackable Operator
Operator : Operators manage the individual data products of the Stackable Data Platform - Link
# list
stackablectl operator list
┌───────────┬────────────────────────────────────────────────────────────────────────────────────────┐
│ Operator ┆ Stable versions │
╞═══════════╪════════════════════════════════════════════════════════════════════════════════════════╡
│ airflow ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.6.0, 0.5.0, 0.4.0, 0.3.0, 0.2.0, 0.1.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ commons ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.4.0, 0.3.0, 0.2.1, 0.2.0, 0.1.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ druid ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.8.0, 0.7.0, 0.6.0, 0.5.0, 0.4.0, 0.3.0, 0.2.0, 0.1.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hbase ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.5.0, 0.4.0, 0.3.0, 0.2.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hdfs ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.6.0, 0.5.0, 0.4.0, 0.3.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hive ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.8.0, 0.7.0, 0.6.0, 0.5.0, 0.3.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ kafka ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.8.0, 0.7.0, 0.6.0, 0.5.0, 0.4.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ listener ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ nifi ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.8.1, 0.8.0, 0.7.0, 0.6.0, 0.5.0, 0.4.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ opa ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.11.0, 0.10.0, 0.9.0, 0.8.0, 0.7.0, 0.6.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ secret ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.6.0, 0.5.0, 0.4.0, 0.3.0, 0.2.0, 0.1.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ spark-k8s ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.6.0, 0.5.0, 0.4.0, 0.3.0, 0.2.0, 0.1.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ superset ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.7.0, 0.6.0, 0.5.0, 0.4.0, 0.3.0, 0.2.0, 0.1.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ trino ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.8.0, 0.7.0, 0.6.0, 0.5.0, 0.4.0, 0.3.1, 0.3.0, 0.2.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ zookeeper ┆ 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.12.0, 0.11.0, 0.10.0, 0.9.0, 0.8.0, 0.7.0, 0.6.0 │
└───────────┴────────────────────────────────────────────────────────────────────────────────────────┘
# 개별 정보 확인
stackablectl operator describe airflow
Operator airflow
Stable versions 23.7.0, 23.4.1, 23.4.0, 23.1.0, 0.6.0, 0.5.0, 0.4.0, 0.3.0, 0.2.0, 0.1.0
Test versions 0.0.0-pr317, 0.0.0-pr316, 0.0.0-pr315, 0.0.0-pr314, 0.0.0-pr312, 0.0.0-pr311, 0.0.0-pr310, 0.0.0-pr308, 0.0.0-pr307, 0.0.0-pr305, 0.0.0-pr304, 0.0.0-pr303
Dev versions 0.0.0-dev
# Install operator
# [터미널1] 모니터링
watch -d kubectl get pod,job,svc,pvc
# [터미널2] 설치
stackablectl operator install airflow commons secret
[INFO ] Installing airflow operator
[INFO ] Installing commons operator
[INFO ] Installing secret operator
# 확인
stackablectl operator installed
OPERATOR VERSION NAMESPACE STATUS LAST UPDATED
airflow 0.0.0-dev default deployed 2023-08-24 08:35:44.418557107 +0000 UTC
commons 0.0.0-dev default deployed 2023-08-24 08:36:07.429509054 +0000 UTC
secret 0.0.0-dev default deployed 2023-08-24 08:36:30.002822765 +0000 UTC
kubectl get crd | grep stackable.tech
kubectl get pod
NAME READY STATUS RESTARTS AGE
airflow-operator-deployment-7cd445fd69-mpwbb 1/1 Running 0 3m16s
commons-operator-deployment-577f88c697-478f6 1/1 Running 0 2m53s
secret-operator-daemonset-9djvc 3/3 Running 0 2m30s
secret-operator-daemonset-9x7np 3/3 Running 0 2m30s
secret-operator-daemonset-gsm8j 3/3 Running 0 2m30s
# Uninstall operator
stackablectl operator uninstall airflow commons secret
stackablectl operator installed
Release : A release is a bundle of operators of a specific stable version
- The stable versions of the operators are tested and proven to work hand in hand. If you want to install a single individual operator, have a look at the Operator command.
#
stackablectl release list
stackablectl release describe 23.7
Release 23.7
Release date 2023-07-26
Description Sixth release focusing on resources and pod overrides
Included products
┌───────────┬──────────────────┐
│ Product ┆ Operator version │
╞═══════════╪══════════════════╡
│ airflow ┆ 23.7.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ commons ┆ 23.7.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ druid ┆ 23.7.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hbase ┆ 23.7.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hdfs ┆ 23.7.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hive ┆ 23.7.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ kafka ┆ 23.7.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ listener ┆ 23.7.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ nifi ┆ 23.7.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ opa ┆ 23.7.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ secret ┆ 23.7.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ spark-k8s ┆ 23.7.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ superset ┆ 23.7.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ trino ┆ 23.7.0 │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ zookeeper ┆ 23.7.0 │