其他分享
首页 > 其他分享> > MPI可靠性设计

MPI可靠性设计

作者:互联网

导入库函数

import subprocess
import numpy as np
import os

 

同步每个节点暂停状态

n = 100
array=np.load('zong.npy')

flag= array[-1][0:2]
flag1 = int(flag[0])
flag2 = int(flag[1])

def bijiao():
    maxflag=-100
    maxnode=-1
    for line in open("cpu_ip.txt"):
        line = line.replace("\n", '')
        IP.append(line)
    for ip in IP:
        if (subprocess.call('timeout 5s ping ' + ip + ' -c3', shell=True)):  # 执行成功返回0 ping不通的执行
            rmip.append(ip)
        else:
            os.system('scp mpiuser@'+str(ip)+':/home/mpiuser/mpii/zong.npy  /home/mpiuser/chaxun.npy')
            a=np.load('/home/mpiuser/chaxun.npy')
            f = a[-1][0:2]
            b=f[1]   #dangqianxunhuancishu
            if b>maxflag:
                maxflag=b
                maxnode=ip
            continue
    return maxflag,maxnode

 

确定节点当前状态并继续运算

while(flag2 < flag1):
    IP = []     # cpu_ip
    rmip = []       # 要删除的IP
    for line in open("cpu_ip.txt"):
        line = line.replace("\n", '')
        IP.append(line)
    for ip in IP:
        if (subprocess.call('timeout 5s ping ' + ip + ' -c3', shell=True)):  # 执行成功返回0 ping不通的执行
            rmip.append(ip)
        else:
            continue
    # 修改节点的进程数
    pro_sum = flag1**2 + 1      # 总进程数
    new_IP = []
    for i in range(len(IP)):
        if (IP[i] not in rmip):
            new_IP.append([IP[i],0])
    left_sum = pro_sum
    while left_sum >= len(new_IP):
        left_sum -= len(new_IP)
        for i in range(len(new_IP)):
            new_IP[i][1] += 1
    if left_sum > 0:
        for i in range(left_sum):
            new_IP[i][1] += 1

    f = open("mpi_config.txt", "w")
    print("open")
    for i in range(len(new_IP)):
        f.write("{}:{}\n".format(new_IP[i][0], new_IP[i][1]))
    f.close()

    maxflag,maxnode=bijiao()
    print('maxflag',maxflag)
    print('maxnode', maxnode)

    os.system('scp   mpiuser@' + str(maxnode) + ':/home/mpiuser/mpii/zong.npy  /home/mpiuser/mpii/')  # kaobei


    for ip in IP:
        if ip not in rmip:
            if ip != maxnode:
                os.system('scp  /home/mpiuser/mpii/zong.npy  mpiuser@' + str(ip) + ':/home/mpiuser/mpii/zong.npy ')   #faguoqu


    if (os.system("mpiexec -n 17 -f mpi_config.txt python3 mpi_helloworld.py")):
        print("unsuccess!restart!")
        array = np.load('zong.npy')

        flag = array[-1][0:2]
        flag1 = int(flag[0])
        flag2 = int(flag[1])
    else:
        print("success!")
        break

 

标签:可靠性,ip,line,new,MPI,IP,设计,mpiuser,maxnode
来源: https://www.cnblogs.com/shi-yi/p/16247414.html