download:Go语言开发分布式任务调度 轻松搞定高性能Crontab
Golang因其强大的协程机制,现已广泛应用于解决分布式环境下的高并发问题。本门课程中将结合Etcd与MongoDB实现一个基于Master-Worker分布式架构的任务调度系统。你不仅将掌握Golang的工程实践能力,而且会收获诸如CAP、Raft的分布式经典理论与架构经验,”工程能力”与”知识体系”齐头并进,实现华丽质变。
适合人群
希望转型Go语言开发的同学
需要提升Go语言的开发经验的同学
技术储备要求
熟悉Go语言基础语法,具备一定开发经验更佳
了解Linux Shell、Github、MySQL的简单用法
ansible部署prometheus+node-exporter简单部署prometheus监控系统
yum装置ansibleyum install ansibleansible的hosts文件
[alertmanagers]10.9.119.1[prometheus]10.9.119.1[node-exporter]10.9.119.110.9.119.210.9.119.3文件层次格式如下:
![]()
prometheusprometheus.yml
- hosts: prometheus remote_user: root tasks: - name: create dir file: path: /opt/prometheus state: directory # 没有目录则创立 - name: copy file unarchive: old-src: prometheus-2.24.0.linux-amd64.tar.gz dest: /opt/prometheus - name: create link file: old-src: /opt/prometheus/prometheus-2.24.0.linux-amd64 dest: /opt/prometheus/prometheus state: link # 软链接 - name: copy service file template: old-src: prometheus.service.j2 dest: /usr/lib/systemd/system/prometheus.service - name: copy config yaml template: old-src: prometheus.yml.j2 dest: /opt/prometheus/prometheus/prometheus.yml notify: - restart prometheus - name: create rules dir file: path: /opt/prometheus/prometheus/rules state: directory - name: copy rules yaml # node里面有特殊符号所以运用copy copy: old-src: node.yml dest: /opt/prometheus/prometheus/rules/node.yml notify: # 此动作将触发handlers - restart prometheus - name: start prometheus service: name: prometheus state: started enabled: yes handlers: - name: restart prometheus service: name: prometheus state: restartedprometheus.service.j2 能够运用copy模块,这里运用了template
[Unit]Description=PrometheusDocumentation=After=network.target[Service]WorkingDirectory=/opt/prometheus/prometheusExecStart=/opt/prometheus/prometheus/prometheusExecReload=/bin/kill -HUP $MAINPIDExecStop=/bin/kill -KILL $MAINPIDType=simpleKillMode=control-groupRestart=on-failureRestartSec=3s[Install]WantedBy=multi-user.targetprometheus.yml.j2
# 全局配置global: scrape_interval: 30s #抓取距离时间 evaluation_interval: 30s #规则引擎执行距离时间 query_log_file: ./promql.log# 告警配置alerting: alertmanagers: # Alertmanagers配置 - static_configs: # Alertmanager静态配置 - targets: # alertmanager发送目的配置{% for alertmanager in groups['alertmanagers'] %} - {{ alertmanager }}:9093{% endfor %}rule_files: # 规则文件配置 - "rules/*.yml"scrape_configs: # 抓取配置 - job_name: 'prometheus' #任务 采集目的分类 static_configs: # 抓取目的静态配置 - targets:{% for prometheu in groups['prometheus'] %} - "{{ prometheu }}:9090" #抓取目的{% endfor %} - job_name: "node" static_configs: - targets:{% for node in groups['node-exporter'] %} - "{{ node }}:9100"{% endfor %}node-rules规则文件node.yml
groups:- name: node.rules # 报警规则组称号 rules: - alert: node is Down expr: up == 0 for: 30s #持续时间,表示持续30秒获取不到信息,则触发报警 labels: severity: serious # 自定义标签 严重的 annotations: summary: "Instance {{ $labels.instance }} down" # 自定义摘要 description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes." # 自定义详细描绘 - alert: node Filesystem expr: 100 - (node_filesystem_free_bytes{fstype=~"ext4|xfs"} / node_filesystem_size_bytes{fstype=~"ext4|xfs"} * 100) > 80 for: 2m labels: severity: warning annotations: summary: "{{$labels.instance}}: {{$labels.mountpoint }} 分区运用过高" description: "{{$labels.instance}}: {{$labels.mountpoint }} 分区运用大于 80% (当前值: {{ $value }})" - alert: node Memory expr: 100 - (node_memory_MemFree_bytes+node_memory_Cached_bytes+node_memory_Buffers_bytes) / node_memory_MemTotal_bytes * 100 > 80 for: 2m labels: severity: warning annotations: summary: "{{$labels.instance}}: 内存运用过高" description: "{{$labels.instance}}: 内存运用大于 80% (当前值: {{ $value }})" - alert: node CPU expr: 100 - (avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance) * 100) > 80 for: 2m labels: severity: warning annotations: summary: "{{$labels.instance}}: CPU运用过高" description: "{{$labels.instance}}: CPU运用大于 80% (当前值: {{ $value }})"node-exporternode-exporter.yml
- hosts: node-exporter remote_user: root tasks: - name: create dir file: path: /opt/prometheus state: directory - name: copy file unarchive: old-src: node_exporter-1.0.1.linux-amd64.tar.gz dest: /opt/prometheus - name: create link file: old-src: /opt/prometheus/node_exporter-1.0.1.linux-amd64 dest: /opt/prometheus/node_exporter state: link - name: copy service file template: old-src: node_exporter.service.j2 dest: /usr/lib/systemd/system/node_exporter.service - name: start node_exporter service: name: node_exporter state: restarted enabled: yesnode_exporter.service.j2
[Unit]Description=Node ExporterDocumentation=After=network.target[Service]WorkingDirectory=/opt/prometheus/node_exporter/ExecStart=/opt/prometheus/node_exporter/node_exporterExecStop=/bin/kill -KILL $MAINPIDType=simpleKillMode=control-groupRestart=on-failureRestartSec=3s[Install]WantedBy=multi-user.targetalertmanageralertmanager.yaml
- hosts: alertmanagers remote_user: root tasks: - name: create dir file: path: /opt/prometheus state: directory - name: copy file unarchive: old-src: alertmanager-0.21.0.linux-amd64.tar.gz dest: /opt/prometheus - name: create link file: old-src: /opt/prometheus/alertmanager-0.21.0.linux-amd64 dest: /opt/prometheus/alertmanager state: link - name: copy service file template: old-src: alertmanager.service.j2 dest: /usr/lib/systemd/system/alertmanager.service - name: copy config yaml template: old-src: alertmanager.yml.j2 dest: /opt/prometheus/alertmanager/alertmanager.yml notify: - restart alertmanager - name: start server service: name: alertmanager state: restarted enabled: yes handlers: - name: restart alertmanager service: name: alertmanager state: restartedalertmanager.service.j2
[Unit]Description=AlertManagerDocumentation=After=network.target[Service]WorkingDirectory=/opt/prometheus/alertmanager/ExecStart=/opt/prometheus/alertmanager/alertmanagerExecReload=/bin/kill -HUP $MAINPIDExecStop=/bin/kill -KILL $MAINPIDType=simpleKillMode=control-groupRestart=on-failureRestartSec=3s[Install]WantedBy=multi-user.targetalertmanager.yml.j2 这里运用了邮箱告警
global: resolve_timeout: 5m # 当告警的状态有firing变为resolve的以后还要呆多长时间,才宣公告警解除。 smtp_from: "123456789@qq.com" smtp_smarthost: 'smtp.qq.com:465' smtp_auth_username: "123456789@qq.com" # 邮箱账号 smtp_auth_password: "bcvizcgqbgojjjeb" # 口令密码,非QQ密码 smtp_require_tls: false # 运用465端口,这里选falseroute: group_by: ['alertname'] # 采用哪个标签作为分组的根据 group_wait: 10s # 分组等候的时间10s group_interval: 10s # 上下两组发送告警的距离时间10s repeat_interval: 24h # 反复发送告警时间。默许1h 不会反复发送相同告警 静默 receiver: 'default-receiver' # 默许接纳人 # 一切不匹配以下子路由的告警都将保存在根节点,并发送到'default-receiver' routes: # 分组 - receiver: 'db' group_wait: 10s match_re: # 运用正则匹配告警包含两个效劳,发送到db service: mysql|redis #一切service=mysql或者service=redis的告警分配到db接纳端 - receiver: 'web' group_by: [product, environment] # 采用product和environment标签作为分组的根据 match: team: frontend # 一切告警标签带有frontend发送到webreceivers:- name: 'default-receiver' email_configs: - to: '123456789@qq.com' # 告警收件人- name: 'db' # 经过邮箱发送报警 email_configs: - to: '111111111@qq.com'- name: 'web' email_configs: - to: '222222222@qq.com'inhibit_rules: # 抑止,但两个都报警了,级别严重的会抑止级别正告的,只发作严重级别的告警 - source_match: severity: 'critical' # critaical的报警会抑止warning级别的报警信息 target_match: severity: 'warning' equal: ['alertname', 'dev', 'instance']