sort_by_disk.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. '''
  4. 按照磁盘占用率从大到小装箱,即按照磁盘先用完为止进行分配实例到主机。
  5. @Auther :liuyuqi.gov@msn.cn
  6. @Time :2018/7/7 0:43
  7. @File :sort_by_disk.py
  8. '''
  9. import matplotlib
  10. matplotlib.use('Agg')
  11. import pandas as pd
  12. import matplotlib.pyplot as plt
  13. from configparser import ConfigParser
  14. import time
  15. import libs.save_result
  16. cf = ConfigParser()
  17. config_path = "../conf/config.ini"
  18. section_name = "data_file_name"
  19. cf.read(config_path)
  20. app_interference = cf.get(section_name, "app_interference")
  21. app_resources = cf.get(section_name, "app_resources")
  22. instance_deploy = cf.get(section_name, "instance_deploy")
  23. machine_resources = cf.get(section_name, "machine_resources")
  24. app = cf.get(section_name, "app")
  25. instance = cf.get(section_name, "instance")
  26. # app
  27. df1 = pd.read_csv(app_resources, encoding="utf-8")
  28. # instance
  29. df3 = pd.read_csv(instance_deploy, header=None,
  30. names=list(["instanceid", "appid", "machineid"]))
  31. # machine
  32. # 其实就两类,所以就不需要导入数据了。
  33. # 限制表
  34. df4 = pd.read_csv(app_interference, header=None,
  35. names=list(["appid1", "appid2", "max_interference"]), encoding="utf-8")
  36. result = pd.DataFrame(columns=list(["instanceid"], "machineid"))
  37. tem_disk = tem_mem = tem_cpu = tem_P = tem_M = tem_PM = 0
  38. tmp_stand_cpu1 = 32
  39. tmp_stand_mem1 = 64
  40. tmp_stand_disk1 = 600
  41. tmp_stand_cpu2 = 92
  42. tmp_stand_mem2 = 288
  43. tmp_stand_disk2 = 600
  44. tmp_stand_P = 7
  45. tmp_stand_M1 = 3
  46. tmp_stand_M2 = 7
  47. tmp_stand_PM1 = 7
  48. tmp_stand_PM2 = 9
  49. machine_count = 0 # 3000小机器,3000大机器。所以在小机器用完换大机器
  50. j = 1 # j表示主机序号,从1-3000,3001到6000
  51. is_deploy = False # 主机j是否部署了instance
  52. deploy_list = list() # 主机j部署的instanceid实例
  53. # 各app之间的限制
  54. def restrictApp(instance, deploy_list):
  55. # df4["appid1"]
  56. # df4["appid2"]
  57. return True
  58. # 执行部署方案
  59. def deplay():
  60. print("------------开始部署啦--------------")
  61. start = time.time()
  62. row, column = df3.shape
  63. while row > 0:
  64. deployInstance(row, j)
  65. # 整个instace都遍历了,第j主机无法再放入一个,所以添加j+1主机
  66. row, column = df3.shape
  67. j = j + 1
  68. # 部署完事
  69. print("------------部署完啦--------------")
  70. end = time.time()
  71. print("总共耗时:", end - start, "秒")
  72. print("总共需要主机数:", j)
  73. print("部署方案前几条示意:", result.head())
  74. libs.save_result.save_result(result)
  75. def deployInstance(mlength, j):
  76. '''
  77. 根据限制部署实例到主机上
  78. :param mlength: 根据剩余的instance数量循环
  79. :param j: 第j台主机
  80. :return: 暂未定返回值,None
  81. '''
  82. global is_deploy, tem_disk, tem_mem, tem_cpu, tem_P, tem_M, tem_PM
  83. for i in range(0, mlength):
  84. tem_disk = tem_disk + df3["disk"][i] # 当前磁盘消耗
  85. tem_mem = tem_mem + df3["mem"][i]
  86. tem_cpu = tem_cpu + df3["cpu"][i]
  87. tem_P = tem_P + df3["P"][i]
  88. tem_M = tem_M + df3["M"][i]
  89. tem_PM = tem_PM + df3["PM"][i]
  90. # if 满足限制表条件,则把当前实例部署到这台主机上。
  91. if is_deploy == True:
  92. if tem_disk < tmp_stand_disk1: # 磁盘够
  93. if restrictApp(instance=df3["instanceid"], deploy_list=deploy_list):
  94. if tem_mem < tmp_stand_mem1: # 内存够
  95. if tem_cpu < tmp_stand_cpu1: # CPU够
  96. if tem_M < tmp_stand_M1:
  97. if tem_P < tmp_stand_P:
  98. if tem_PM < tmp_stand_PM1:
  99. result["machine"][i] = "machine_" + i
  100. else:
  101. # 主机j没有部署实例,则先部署一个
  102. result["machine"][i] = "machine_" + i
  103. is_deploy = True
  104. is_deploy = False
  105. def plotGroup(): # df3新建一列
  106. df3["disk"] = None
  107. for i in range(0, 68219):
  108. df3["disk"][i] = lambda x: x[i], df1["disk"]
  109. # instance分类统计
  110. group1 = df3.groupby("appid").count()
  111. print(type(group1))
  112. print(group1["instanceid"].sort_values(ascending=False))
  113. plt.plot(group1["instanceid"].sort_values(ascending=False))
  114. plt.savefig("../submit/group1.jpg")
  115. # 找到每个instance消耗的disk
  116. # df3["disk"] =