sort_by_disk.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. '''
  4. 按照磁盘占用率从大到小装箱,即按照磁盘先用完为止进行分配实例到主机。
  5. @Auther :liuyuqi.gov@msn.cn
  6. @Time :2018/7/7 0:43
  7. @File :sort_by_disk.py
  8. '''
  9. import matplotlib
  10. matplotlib.use('Agg')
  11. import pandas as pd
  12. import matplotlib.pyplot as plt
  13. from configparser import ConfigParser
  14. import time
  15. import libs.save_result
  16. # A value is trying to be set on a copy of a slice from a DataFrame.
  17. pd.set_option('mode.chained_assignment', 'raise')
  18. # sys.path.append("/home/ubuntu/ServerManager")
  19. cf = ConfigParser()
  20. config_path = "../conf/config.ini"
  21. section_name = "data_file_name"
  22. cf.read(config_path)
  23. app_interference = cf.get(section_name, "app_interference")
  24. app_resources = cf.get(section_name, "app_resources")
  25. instance = cf.get(section_name, "instance")
  26. # app
  27. df1 = pd.read_csv(app_resources, encoding="utf-8")
  28. # instance
  29. df3 = pd.read_csv(instance)
  30. # print(df3["cpu"].value_counts())
  31. # print(df3.head())
  32. df3["cpu"] = df3["cpu"].astype("float")
  33. df3["disk"] = df3["disk"].astype("float")
  34. df3["mem"] = df3["mem"].astype("float")
  35. df3["M"] = df3["M"].astype("float")
  36. df3["P"] = df3["P"].astype("float")
  37. df3["PM"] = df3["PM"].astype("float")
  38. df3["isdeploy"] = False
  39. # machine
  40. # 其实就两类,所以就不需要导入数据了。
  41. # 限制表
  42. df4 = pd.read_csv(app_interference, header=None,
  43. names=list(["appid1", "appid2", "max_interference"]), encoding="utf-8")
  44. result = pd.DataFrame(columns=list(["instanceid", "machineid"]), data=list())
  45. tem_pre_disk = tem_pre_mem = tem_pre_cpu = tem_pre_P = tem_pre_M = tem_pre_PM = 0
  46. tem_disk = tem_mem = tem_cpu = tem_P = tem_M = tem_PM = 0
  47. tmp_stand_cpu1 = 32
  48. tmp_stand_mem1 = 64
  49. tmp_stand_disk1 = 600
  50. tmp_stand_cpu2 = 92
  51. tmp_stand_mem2 = 288
  52. tmp_stand_disk2 = 600
  53. tmp_stand_P = 7
  54. tmp_stand_M1 = 3
  55. tmp_stand_M2 = 7
  56. tmp_stand_PM1 = 7
  57. tmp_stand_PM2 = 9
  58. machine_count = 0 # 3000小机器,3000大机器。所以在小机器用完换大机器
  59. j = 1 # j表示主机序号,从1-3000,3001到6000
  60. is_deploy = False # 主机j是否部署了instance
  61. deploy_list = list() # 主机j部署的instanceid实例
  62. # 各app之间的限制
  63. def restrictApps(instance, deploy_list):
  64. len_list = len(deploy_list)
  65. if len_list == 0:
  66. return True
  67. else:
  68. ct = pd.Series(deploy_list).value_counts()
  69. for k, v in ct.items():
  70. tmp = df4.loc[(df4["appid1"] == k) & (df4["appid2"] == instance)]
  71. row, col = tmp.shape
  72. if row > 0:
  73. if ct[instance] + 1 > tmp["max_interference"]:
  74. return False
  75. else:
  76. #在限制表中找不到限制条件
  77. return True
  78. # 执行部署方案
  79. def deploy():
  80. global j, is_deploy, tem_mem, tem_cpu, tem_disk, tem_P, tem_M, tem_PM, tem_pre_disk, tem_pre_mem, \
  81. tem_pre_cpu, tem_pre_P, tem_pre_M, tem_pre_PM, result, df3, deploy_list
  82. print("------------开始部署啦--------------")
  83. start = time.time()
  84. row, column = df3.shape
  85. while row > 0:
  86. deployInstance(row)
  87. # 整个instace都遍历了,第j主机无法再放入一个,所以添加j+1主机
  88. row, column = df3.shape
  89. j = j + 1
  90. # j++之后表示新建主机,所以新主机没有部署任何实例,为false
  91. is_deploy = False
  92. tem_pre_disk = tem_pre_mem = tem_pre_cpu = tem_pre_P = tem_pre_M = tem_pre_PM = 0
  93. tem_disk = tem_mem = tem_cpu = tem_P = tem_M = tem_PM = 0
  94. df3 = df3[df3["isdeploy"] == False]
  95. deploy_list = list()
  96. # 部署完事
  97. print("------------部署完啦--------------")
  98. end = time.time()
  99. print("总共耗时:", end - start, "秒")
  100. print("总共需要主机数:", j)
  101. print("部署方案前几条示意:", result.head())
  102. libs.save_result.save_result(result)
  103. def deployInstance(row):
  104. '''
  105. 根据限制部署实例到主机上
  106. :param row: 根据剩余的instance数量循环
  107. :param j: 第j台主机
  108. :return: 暂未定返回值,None
  109. '''
  110. global is_deploy, tem_mem, tem_cpu, tem_disk, tem_P, tem_M, tem_PM, tem_pre_disk, tem_pre_mem, tem_pre_cpu, tem_pre_P, tem_pre_M, tem_pre_PM, result, j, df3, deploy_list
  111. for i in range(0, row):
  112. tem_pre_cpu = tem_cpu + df3["cpu"][i]
  113. tem_pre_mem = tem_mem + df3["mem"][i]
  114. tem_pre_disk = tem_disk + df3["disk"][i] # 当前磁盘消耗
  115. tem_pre_P = tem_P + df3["P"][i]
  116. tem_pre_M = tem_M + df3["M"][i]
  117. tem_pre_PM = tem_PM + df3["PM"][i]
  118. # if 满足限制表条件,则把当前实例部署到这台主机上。
  119. if j < 3000: # 使用小主机
  120. if is_deploy == True:
  121. if tem_pre_disk < tmp_stand_disk1: # 磁盘够
  122. if restrictApps(instance=df3["instanceid"][i], deploy_list=deploy_list):
  123. if tem_pre_mem < tmp_stand_mem1: # 内存够
  124. if tem_pre_cpu < tmp_stand_cpu1: # CPU够
  125. if tem_pre_M < tmp_stand_M1:
  126. if tem_pre_P < tmp_stand_P:
  127. if tem_pre_PM < tmp_stand_PM1:
  128. # 条件都满足,则把instance放入主机,同时df3表中去掉这个部署好的一行
  129. result = result.append(pd.DataFrame(
  130. [{"instanceid": df3["instanceid"][i],
  131. "machineid": "machine_" + str(j)}]))
  132. tem_disk = tem_disk + df3["disk"][i]
  133. tem_mem = tem_mem + df3["mem"][i]
  134. tem_cpu = tem_cpu + df3["cpu"][i]
  135. tem_P = tem_P + df3["P"][i]
  136. tem_M = tem_M + df3["M"][i]
  137. tem_PM = tem_PM + df3["PM"][i]
  138. df3.loc[i, "isdeploy"] = True
  139. deploy_list.append(df3["instanceid"][i])
  140. else:
  141. # 主机j没有部署实例,则先部署一个
  142. result = result.append(
  143. pd.DataFrame([{"instanceid": df3["instanceid"][i], "machineid": "machine_" + str(j)}]))
  144. tem_disk = tem_disk + df3["disk"][i]
  145. tem_mem = tem_mem + df3["mem"][i]
  146. tem_cpu = tem_cpu + df3["cpu"][i]
  147. tem_P = tem_P + df3["P"][i]
  148. tem_M = tem_M + df3["M"][i]
  149. tem_PM = tem_PM + df3["PM"][i]
  150. df3.loc[i, "isdeploy"] = True
  151. deploy_list.append(df3["instanceid"][i])
  152. # df3["isdeploy"][i] = True
  153. is_deploy = True
  154. else: # 使用大主机
  155. if is_deploy == True:
  156. if tem_pre_disk < tmp_stand_disk2: # 磁盘够
  157. if restrictApps(instance=df3["instanceid"][i], deploy_list=deploy_list):
  158. if tem_pre_mem < tmp_stand_mem2: # 内存够
  159. if tem_pre_cpu < tmp_stand_cpu2: # CPU够
  160. if tem_pre_M < tmp_stand_M2:
  161. if tem_pre_P < tmp_stand_P:
  162. if tem_pre_PM < tmp_stand_PM2:
  163. # 条件都满足,则把instance放入主机
  164. result = result.append(pd.DataFrame(
  165. [{"instanceid": df3["instanceid"][i],
  166. "machineid": "machine_" + str(j)}]))
  167. tem_disk = tem_disk + df3["disk"][i]
  168. tem_mem = tem_mem + df3["mem"][i]
  169. tem_cpu = tem_cpu + df3["cpu"][i]
  170. tem_P = tem_P + df3["P"][i]
  171. tem_M = tem_M + df3["M"][i]
  172. tem_PM = tem_PM + df3["PM"][i]
  173. df3.loc[i, "isdeploy"] = True
  174. deploy_list.append(df3["instanceid"][i])
  175. else:
  176. # 主机j没有部署实例,则先部署一个
  177. result = result.append(
  178. pd.DataFrame([{"instanceid": df3["instanceid"][i], "machineid": "machine_" + str(j)}]))
  179. tem_disk = tem_disk + df3["disk"][i]
  180. tem_mem = tem_mem + df3["mem"][i]
  181. tem_cpu = tem_cpu + df3["cpu"][i]
  182. tem_P = tem_P + df3["P"][i]
  183. tem_M = tem_M + df3["M"][i]
  184. tem_PM = tem_PM + df3["PM"][i]
  185. df3.loc[i, "isdeploy"] = True
  186. deploy_list.append(df3["instanceid"][i])
  187. is_deploy = True
  188. def plotGroup(): # df3新建一列
  189. df3["disk"] = None
  190. for i in range(0, 68219):
  191. df3["disk"][i] = lambda x: x[i], df1["disk"]
  192. # instance分类统计
  193. group1 = df3.groupby("appid").count()
  194. print(type(group1))
  195. print(group1["instanceid"].sort_values(ascending=False))
  196. plt.plot(group1["instanceid"].sort_values(ascending=False))
  197. plt.savefig("../submit/group1.jpg")
  198. # 找到每个instance消耗的disk
  199. # df3["disk"] =
  200. # 跑
  201. deploy()