ES常见问题
作者:互联网
目录
2.es 写入超时
3.重索引
4.es常用命令
6.运维的一些命令
7.新加Es节点操作
1.修改某个索引的分片数
#其中, index_patterns 表示要匹配的索引名
#"order": 1 ,这个值,在的会overwite小的值,eg: 默认的es_tempate一般我们都配置为0,这个什为1,那个这值里面的所以参数都会overwirte 值为0的参数
#number_of_shards 表示分片数
#number_of_routing_shards 可扩展的最大分片数,要number_of_shards 倍数,8倍比较合适
PUT _template/gakx_vehicle1_new
{
"index_patterns": [
"gakx.vehicle1_*"
],
"order": 1,
"settings": {
"index": {
"number_of_shards": 15,
"number_of_routing_shards": 120
}
}
}
2.es 写入超时
es 写入超时,多方面原因
#先查询集群状态
get _cluster/health
#再看五上bulk 或者 wirte 队列情况
# es 6.1 6.3
GET _cat/thread_pool/bulk?v&h=ip,port,name,type,active,size,queue,queue_size,rejected,largest,completed
# es 6.7
GET _cat/thread_pool/write?v&h=ip,port,name,type,active,size,queue,queue_size,rejected,largest,completed
3.重索引
#_reindex 重索引
POST _reindex
{
"source": {
"index": "fjst_gakx.bill1_20190808",
"size": 5000
},
"dest": {
"index": "fjst_gakx.bill1_20190808new"
}
}
#索引与别名是多对多的关系
#一个索引可以有多个别名,一个别名也可以对应多个索引,
#设置别名
PUT /my_index_v1/_alias/my_index
#查看 别名为my_index所有 索引
GET /*/_alias/my_index
#查看这个索引有几个别名
GET /my_index_v1/_alias/*
#设置副本数
PUT 4gyd.post_20191029/_settings
{
"index": {
"number_of_replicas": 0
}
}
4.es常用命令
#查看分词情况
post idx_z06crjzjcs_j_qz_wg_my_1205/_analyze
{
"analyzer":"ik_max_word",
"text":"黄小明在台湾abcdedf"
}
post _analyze
{
"analyzer":"ik_max_word",
"text":"黄小明在台湾abcdedf"
}
#查看段的情况
get idx_z22czrk_gab_czrk_jbxx_new_zl_my_1218/_segments
#进行段的合并
post idx_z22czrk_gab_czrk_jbxx_new_zl_my_1218/_forcemerge?only_expunge_deletes=false&max_num_segments=1&flush=true&pretty
5.华为云es开启安全模式,客户端使用方法
1.kibana使用方法.
kibana需要使用oss的版本,如: kibana-oss-xxx-linux-x86_64.tar.gz
#配置里面需要修改
elasticsearch.ssl.verificationMode: none
#bWVpeWE6QWRtaW5AMTIzNDU= 为 base64(用户:密码)
elasticsearch.customHeaders: {"custom-proxy-username":"bWVpeWE6QWRtaW5AMTIzNDU="}
2.程序直接访问(有些功能没有配置资源目录,如姓名分析,可以使用这个方式).
1)把原来的http修改为https
2) request head 增加参数
#bWVpeWE6QWRtaW5AMTIzNDU= 为 base64(用户:密码)
"custom-proxy-username":"bWVpeWE6QWRtaW5AMTIzNDU="
6.运维的一些命令
#查看 硬盘分布情况
get _cat/allocation?v
#查看集群状态
#curl -Xget "http://192.168.1.1:9200/_cluster/health?pretty"
get _cluster/health
{
"cluster_name": "elasticsearch_cluster",
"status": "yellow",
"timed_out": false,
"number_of_nodes": 311,
"number_of_data_nodes": 231,
"active_primary_shards": 20161,
"active_shards": 38804,
"relocating_shards": 0,
"initializing_shards": 60,
"unassigned_shards": 13,
"delayed_unassigned_shards": 0,
"number_of_pending_tasks": 6,
"number_of_in_flight_fetch": 0,
"task_max_waiting_in_queue_millis": 53955,
"active_shards_percent_as_number": 99.81222830979758
}
#如果有没分配的,要查看分配的分片
#查看分配情况 ,INITIALIZING 代表是在init初始化中,UNASSIGNED 代表还没分配的
get _cat/shards?v
4gdx.post_20191126 0 r UNASSIGNED
#如果还有没有分配的,查看一下原因
#查看分配失败的原因
get _cluster/allocation/explain
{
"index":"4gyd.post_20191126",
"shard":0,
"primary":false
}
#检查是否gc达到100%
/opt/shell/jstat.sh /usr/local/elk/es1
观察O指标
#查看原因,解决
#原因1. allocation_expalin temporarily thottled 超过最大限制
#解决方法:设置初始化参数,加快初始化
put _cluster/settings
{
"transient":{
"cluster.routing.allocation.node_concurrent_recoveries":60
}
}
#原因2.Too manny oppen file 文件句柄过多
#请查看 由tranlog引起的文件句柄过多问题解决
#原因3: "allocate_explanation": "cannot allocate because all found copies of the shard are either stale or corrupt",
#说明数据有问题,或者不同步,找到一下"in_sync": false,的节点.完成后再查看explain .如果还有问题把allocate_stale_primary 设置成allocate_empty_primary .允许为空分片
POST _cluster/reroute
{
"commands": [
{"allocate_stale_primary": {
"index": "t_sys_log_sga_20200709",
"shard": 1,
"node": "LN82kzp",
"accept_data_loss":true
}}
]
}
#设置平衡参数,加快平衡
put _cluster/settings
{
"transient":{
"cluster.routing.allocation.cluster_concurrent_rebalance":60,
"indices.recovery.max_bytes_per_sec":"1gb"
}
}
#查看集群的所有参数
GET _cluster/settings
{
"persistent": {
"indices": {
"breaker": {
"fielddata": {
"limit": "20%"
},
"request": {
"limit": "10%"
},
"total": {
"limit": "70%"
}
}
}
},
"transient": {
"cluster": {
"routing": {
"rebalance": {
"enable": "all"
},
"allocation": {
"cluster_concurrent_rebalance": "2",
"node_concurrent_recoveries": "60",
"node_initial_primaries_recoveries": "4",
"enable": "all"
}
}
}
}
}
#查看索引情况
get _cat/indices
#查看节点情况(挂了的就不在这里面)
GET _cat/nodes?v
#pending 任务
get _cat/pending_tasks?v
#重新分布分片?
post _cluster/reroute?retry_failed=true
#去掉只读模型
put */_settings
{
"index.blocks.read_only_allow_delete":null
}
#减少不平衡现象,每个表分配的主分片个数
put */_settings
{
"index.routing.allocation.total_shards_per_node":2
}
#比较多台,重启一般要操作的步骤
#关闭自动分片
PUT _cluster/settings
{
"persistent": {
"cluster": {
"routing": {
"allocation.enable": "none"
}
}
}
}
#kill es 进程
#启动 es集群
#启动自动分片
PUT _cluster/settings
{
"persistent": {
"cluster": {
"routing": {
"allocation.enable": "all"
}
}
}
}
7.新加Es节点操作
#分成两种情况.
#一:新的节点上部署.
1.做互信,系统参数祝始化,建立es用户,在/data盘,创建文件目录,受权给es用户.(可参考安装文档)
2.修改配置,增加新的ip地址,如:
vim /usr/local/elk/es1/config/elasticsearch.yml
discovery.zen.ping.unicast.hosts: ["xxxx","xxxx"]
3.分发软件,scp 到新的机器上
4.ssh 新的节点,修改配置文件
vim /usr/local/elk/es1/config/elasticsearch.yml
network.host: xxx.xxx.xxx
5.重启es集群
PUT _cluster/settings
{
"persistent": {
"cluster": {
"routing": {
"allocation.enable": "none"
}
}
}
}
#kill es 进程
#启动 es集群
#启动自动分片
PUT _cluster/settings
{
"persistent": {
"cluster": {
"routing": {
"allocation.enable": "all"
}
}
}
}
#二:在老节点上,新增加实例.
1.copy 软件,如
cp /usr/local/elk/es1 /usr/local/elk/es2
2.修改配置,增加新的ip地址,如:
#修改旧的地址配置,增加discovery.zen.ping.unicast.hosts 新的端口
vim /usr/local/elk/es1/config/elasticsearch.yml
discovery.zen.ping.unicast.hosts:
["x.x.x.x:9300","x.x.x.x:9302",]
#修改新的地址配置,增加discovery.zen.ping.unicast.hosts 新的端口 和 http.port,transport.tcp.port 的端口
vim /usr/local/elk/es1/config/elasticsearch.yml
http.port: 9202
transport.tcp.port: 9302
discovery.zen.ping.unicast.hosts:
["x.x.x.x:9300","x.x.x.x:9302",]
3.分发软件,scp 到新的机器上
4.重启es集群
PUT _cluster/settings
{
"persistent": {
"cluster": {
"routing": {
"allocation.enable": "none"
}
}
}
}
#kill
es 进程
#启动 es集群
#启动自动分片
PUT _cluster/settings
{
"persistent": {
"cluster": {
"routing": {
"allocation.enable": "all"
}
}
}
}
8.由tranlog引起的文件句柄过多问题解决
#查看机器文件占用的句柄
/opt/FusionInsight_SetupTool/preinstall/tools/cluster/clustercmd.sh "cat /proc/sys/fs/file-nr"
"cat /proc/sys/fs/file-nr"
#如果有大于30W 应该处理一下.
vi /opt/check_fd.sh
ps -ef|grep java|grep org.elasticsearch.bootstrap.Elasticsearch|grep -v grep |awk '{print $2}'|while read line;do cd /proc/$line/fd;ls -l > /opt/fd_$line.txt;done;wc -l /opt/fd_*|grep -v 总用量|awk '{if($1>5000) print $2}'|while read line;do cat $line|grep translog|awk -F'/' '{print $9}'|sort|uniq -c|awk '{if($1>1000)print $0}';done;
#分发到其它台机器
/opt/FusionInsight_SetupTool/preinstall/tools/cluster/clusterscp.sh put /opt/check_fd.sh /opt/check_fd.sh
#执行检查脚本 查看是哪个索引引起的
/opt/FusionInsight_SetupTool/preinstall/tools/cluster/clustercmd.sh "sh /opt/check_fd.sh"
47845 d6AF_i8_SWa3cV5kBybpXg
87405 57icq0x4S7KFsxe4k7sWrg
#查看所有 index 和对于的id
#地址要修改成对应的地址: eg:大集群:x.x.x.x:24148
curl -Xget "http://163.1.6.3:24148/_cat/indices?v" > /opt/all_index.txt
#这个ip就是对应的kibanna.yml里面的elasticsearch.url,位置在总控/usr/local/elk/kibana-6.1.3-8601/config
[root@hosts01 ~]# cat /opt/all_index.txt |grep h-sdnAM5Q32AoD1fTLIq0Q
green open 4gyd.log_20211215 h-sdnAM5Q32AoD1fTLIq0Q 5 1 37255447
#设置有问题的index, 副本设置为0,
PUT 4gdx.post_20211211/_settings
{
"number_of_replicas": 0
}
#副本设置为1
PUT 4gdx.post_20211211/_settings
{
"number_of_replicas": 1
}
标签:index,常见问题,settings,shards,number,cluster,ES,es 来源: https://blog.csdn.net/weixin_44024436/article/details/123612781