Browse Source

基本完善。

liuyuqi-dellpc 5 years ago
parent
commit
7688f93263
9 changed files with 272 additions and 48 deletions
  1. 8 2
      code/data_preview.py
  2. 6 7
      code/instance.py
  3. 1 1
      code/main.py
  4. 129 37
      code/sort_by_disk.py
  5. 10 0
      code/t2.py
  6. 30 0
      code/t4.py
  7. 17 0
      code/t_logging.py
  8. 44 1
      code/test_pandas.py
  9. 27 0
      libs/mlogging.py

+ 8 - 2
code/data_preview.py

@@ -9,6 +9,7 @@
 # 后台做图,不需要GUI需要在头部第一行加入下面两行代码
 # %matplotlib inline   jupyter中加入这一行
 import matplotlib
+
 matplotlib.use('Agg')
 
 # 数据预览
@@ -28,6 +29,7 @@ app_resources = cf.get(section_name, "app_resources")
 instance_deploy = cf.get(section_name, "instance_deploy")
 machine_resources = cf.get(section_name, "machine_resources")
 
+
 def for_df1():
     # 应用app表: 应用id/cpu占用量/内存占用/磁盘占用/P/M/PM等指标
     df1 = pd.read_csv(app_resources, header=None,
@@ -95,7 +97,7 @@ def for_df3():
 def for_df4():
     # 主机和实例表。部署appid1的insterference最多可以部署n个appid2
     df4 = pd.read_csv(app_interference, header=None,
-                     names=list(["appid1", "appid2", "max_interference"]), encoding="utf-8")
+                      names=list(["appid1", "appid2", "max_interference"]), encoding="utf-8")
     # 查看数据类型
     # print(df.dtypes)
     print("df数据大小:", df4.shape)
@@ -127,4 +129,8 @@ def for_df4():
     plt.savefig("../submit/fig1.png")
 
 
-for_df4()
+def seeInstance():
+    df4 = pd.read_csv("../data/instance.csv", header=None, encoding="utf-8",low_memory=False)
+    df4.head()
+
+seeInstance()

+ 6 - 7
code/instance.py

@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 '''
-按照app对instance分类
+按照app对instance分类,存储一个新的instance.csv文件,后面添加了cpu,mem,disk,P,M,PM等几列
 @Auther :liuyuqi.gov@msn.cn
 @Time :2018/7/6 16:13
 @File :instance.py
@@ -18,13 +18,12 @@ section_name = "data_file_name"
 cf.read(config_path)
 
 app_interference = cf.get(section_name, "app_interference")
-app_resources = cf.get(section_name, "app_resources")
+app = cf.get(section_name, "app")
 instance_deploy = cf.get(section_name, "instance_deploy")
 machine_resources = cf.get(section_name, "machine_resources")
 
 # app
-df1 = pd.read_csv(app_resources, header=None,
-                  names=list(["appid", "cpu", "mem", "disk", "P", "M", "PM"]), encoding="utf-8")
+df1 = pd.read_csv(app, encoding="utf-8")
 
 # instance
 df3 = pd.read_csv(instance_deploy, header=None,
@@ -38,7 +37,7 @@ print(type(group1))
 # plt.savefig("../submit/group1.jpg")
 
 # 找到每个instance消耗的disk
-
+df3["cpu"] = None
 df3["disk"] = None
 df3["mem"] = None
 df3["P"] = None
@@ -47,8 +46,8 @@ df3["PM"] = None
 
 for i in range(0, int(cf.get("table_size", "instance_size"))):
     # df1[df1["appid"] == df3["appid"][i]]["disk"]返回一个pd.Series对象(列表),其实只有一个值,需要选定第一个即可
-    df3["mem"][i] = df1[df1["appid"] == df3["appid"][i]]["mem"].values[0]
-    df3["cpu"][i] = df1[df1["appid"] == df3["appid"][i]]["cpu"].values[0]
+    df3["mem"][i] = df1[df1["appid"] == df3["appid"][i]]["mem_avg"].values[0]
+    df3["cpu"][i] = df1[df1["appid"] == df3["appid"][i]]["cpu_avg"].values[0]
     df3["disk"][i] = df1[df1["appid"] == df3["appid"][i]]["disk"].values[0]
     df3["P"][i] = df1[df1["appid"] == df3["appid"][i]]["P"].values[0]
     df3["M"][i] = df1[df1["appid"] == df3["appid"][i]]["M"].values[0]

+ 1 - 1
code/main.py

@@ -12,7 +12,7 @@ import matplotlib.pyplot as plt
 # 数据预览
 from configparser import ConfigParser
 import libs.save_conf
-
+from libs.mlogging import Log
 
 # step1: 数据参数初始化
 def getConfig():

+ 129 - 37
code/sort_by_disk.py

@@ -17,6 +17,10 @@ from configparser import ConfigParser
 import time
 import libs.save_result
 
+# A value is trying to be set on a copy of a slice from a DataFrame.
+pd.set_option('mode.chained_assignment', 'raise')
+# sys.path.append("/home/ubuntu/ServerManager")
+
 cf = ConfigParser()
 config_path = "../conf/config.ini"
 section_name = "data_file_name"
@@ -24,17 +28,24 @@ cf.read(config_path)
 
 app_interference = cf.get(section_name, "app_interference")
 app_resources = cf.get(section_name, "app_resources")
-instance_deploy = cf.get(section_name, "instance_deploy")
-machine_resources = cf.get(section_name, "machine_resources")
-app = cf.get(section_name, "app")
 instance = cf.get(section_name, "instance")
 # app
 df1 = pd.read_csv(app_resources, encoding="utf-8")
 
 # instance
-df3 = pd.read_csv(instance_deploy, header=None,
-                  names=list(["instanceid", "appid", "machineid"]))
-
+df3 = pd.read_csv(instance)
+
+print(df3["cpu"][0])
+# print(df3["cpu"].value_counts())
+# print(df3.head())
+df3["cpu"] = df3["cpu"].astype("float")
+df3["disk"] = df3["disk"].astype("float")
+df3["mem"] = df3["mem"].astype("float")
+df3["M"] = df3["M"].astype("float")
+df3["P"] = df3["P"].astype("float")
+df3["PM"] = df3["PM"].astype("float")
+
+df3["isdploy"] = False
 # machine
 # 其实就两类,所以就不需要导入数据了。
 
@@ -42,8 +53,9 @@ df3 = pd.read_csv(instance_deploy, header=None,
 df4 = pd.read_csv(app_interference, header=None,
                   names=list(["appid1", "appid2", "max_interference"]), encoding="utf-8")
 
-result = pd.DataFrame(columns=list(["instanceid"], "machineid"))
+result = pd.DataFrame(columns=list(["instanceid", "machineid"]), data=list())
 
+tem_pre_disk = tem_pre_mem = tem_pre_cpu = tem_pre_P = tem_pre_M = tem_pre_PM = 0
 tem_disk = tem_mem = tem_cpu = tem_P = tem_M = tem_PM = 0
 tmp_stand_cpu1 = 32
 tmp_stand_mem1 = 64
@@ -66,23 +78,42 @@ deploy_list = list()  # 主机j部署的instanceid实例
 
 
 # 各app之间的限制
-def restrictApp(instance, deploy_list):
-    # df4["appid1"]
-    # df4["appid2"]
-
-    return True
+def restrictApps(instance, deploy_list):
+    len_list = len(deploy_list)
+    if len_list == 0:
+        return True
+    else:
+        ct = pd.Series(deploy_list).value_counts()
+        for k, v in ct.items():
+            tmp = df4.loc[(df4["appid1"] == k) & (df4["appid2"] == instance)]
+            row, col = tmp.shape
+            if row > 0:
+                if ct[instance] + 1 > tmp["max_interference"]:
+                    return False
+            else:
+                #在限制表中找不到限制条件
+                return True
 
 
 # 执行部署方案
 def deplay():
+    global j, is_deploy, tem_mem, tem_cpu, tem_disk, tem_P, tem_M, tem_PM, tem_pre_disk, tem_pre_mem, \
+        tem_pre_cpu, tem_pre_P, tem_pre_M, tem_pre_PM, result, df3, deploy_list
+
     print("------------开始部署啦--------------")
     start = time.time()
     row, column = df3.shape
     while row > 0:
-        deployInstance(row, j)
+        deployInstance(row)
         # 整个instace都遍历了,第j主机无法再放入一个,所以添加j+1主机
         row, column = df3.shape
         j = j + 1
+        # j++之后表示新建主机,所以新主机没有部署任何实例,为false
+        is_deploy = False
+        tem_pre_disk = tem_pre_mem = tem_pre_cpu = tem_pre_P = tem_pre_M = tem_pre_PM = 0
+        tem_disk = tem_mem = tem_cpu = tem_P = tem_M = tem_PM = 0
+        df3 = df3[df3["isdeplay"] == False]
+        deploy_list = list()
 
     # 部署完事
     print("------------部署完啦--------------")
@@ -93,37 +124,94 @@ def deplay():
     libs.save_result.save_result(result)
 
 
-def deployInstance(mlength, j):
+def deployInstance(row):
     '''
     根据限制部署实例到主机上
-    :param mlength: 根据剩余的instance数量循环
+    :param row: 根据剩余的instance数量循环
     :param j: 第j台主机
     :return: 暂未定返回值,None
     '''
-    global is_deploy, tem_disk, tem_mem, tem_cpu, tem_P, tem_M, tem_PM
-    for i in range(0, mlength):
-        tem_disk = tem_disk + df3["disk"][i]  # 当前磁盘消耗
-        tem_mem = tem_mem + df3["mem"][i]
-        tem_cpu = tem_cpu + df3["cpu"][i]
-        tem_P = tem_P + df3["P"][i]
-        tem_M = tem_M + df3["M"][i]
-        tem_PM = tem_PM + df3["PM"][i]
+    global is_deploy, tem_mem, tem_cpu, tem_disk, tem_P, tem_M, tem_PM, tem_pre_disk, tem_pre_mem, tem_pre_cpu, tem_pre_P, tem_pre_M, tem_pre_PM, result, j, df3, deploy_list
+    for i in range(0, row):
+        tem_pre_cpu = tem_cpu + df3["cpu"][i]
+        tem_pre_mem = tem_mem + df3["mem"][i]
+        tem_pre_disk = tem_disk + df3["disk"][i]  # 当前磁盘消耗
+        tem_pre_P = tem_P + df3["P"][i]
+        tem_pre_M = tem_M + df3["M"][i]
+        tem_pre_PM = tem_PM + df3["PM"][i]
 
         # if 满足限制表条件,则把当前实例部署到这台主机上。
-        if is_deploy == True:
-            if tem_disk < tmp_stand_disk1:  # 磁盘够
-                if restrictApp(instance=df3["instanceid"], deploy_list=deploy_list):
-                    if tem_mem < tmp_stand_mem1:  # 内存够
-                        if tem_cpu < tmp_stand_cpu1:  # CPU够
-                            if tem_M < tmp_stand_M1:
-                                if tem_P < tmp_stand_P:
-                                    if tem_PM < tmp_stand_PM1:
-                                        result["machine"][i] = "machine_" + i
-        else:
-            # 主机j没有部署实例,则先部署一个
-            result["machine"][i] = "machine_" + i
-            is_deploy = True
-    is_deploy = False
+        if j < 3000:  # 使用小主机
+            if is_deploy == True:
+                if tem_pre_disk < tmp_stand_disk1:  # 磁盘够
+                    if restrictApps(instance=df3["instanceid"][i], deploy_list=deploy_list):
+                        if tem_pre_mem < tmp_stand_mem1:  # 内存够
+                            if tem_pre_cpu < tmp_stand_cpu1:  # CPU够
+                                if tem_pre_M < tmp_stand_M1:
+                                    if tem_pre_P < tmp_stand_P:
+                                        if tem_pre_PM < tmp_stand_PM1:
+                                            # 条件都满足,则把instance放入主机,同时df3表中去掉这个部署好的一行
+                                            result = result.append(pd.DataFrame(
+                                                [{"instanceid": df3["instanceid"][i],
+                                                  "machineid": "machine_" + str(j)}]))
+                                            tem_disk = tem_disk + df3["disk"][i]
+                                            tem_mem = tem_mem + df3["mem"][i]
+                                            tem_cpu = tem_cpu + df3["cpu"][i]
+                                            tem_P = tem_P + df3["P"][i]
+                                            tem_M = tem_M + df3["M"][i]
+                                            tem_PM = tem_PM + df3["PM"][i]
+                                            df3.loc[i, "isdploy"] = True
+                                            deploy_list.append(df3["instanceid"][i])
+
+            else:
+                # 主机j没有部署实例,则先部署一个
+                result = result.append(
+                    pd.DataFrame([{"instanceid": df3["instanceid"][i], "machineid": "machine_" + str(j)}]))
+                tem_disk = tem_disk + df3["disk"][i]
+                tem_mem = tem_mem + df3["mem"][i]
+                tem_cpu = tem_cpu + df3["cpu"][i]
+                tem_P = tem_P + df3["P"][i]
+                tem_M = tem_M + df3["M"][i]
+                tem_PM = tem_PM + df3["PM"][i]
+                df3.loc[i, "isdploy"] = True
+                deploy_list.append(df3["instanceid"][i])
+                # df3["isdploy"][i] = True
+                is_deploy = True
+        else:  # 使用大主机
+            if is_deploy == True:
+                if tem_pre_disk < tmp_stand_disk2:  # 磁盘够
+                    if restrictApps(instance=df3["instanceid"][i], deploy_list=deploy_list):
+                        if tem_pre_mem < tmp_stand_mem2:  # 内存够
+                            if tem_pre_cpu < tmp_stand_cpu2:  # CPU够
+                                if tem_pre_M < tmp_stand_M2:
+                                    if tem_pre_P < tmp_stand_P:
+                                        if tem_pre_PM < tmp_stand_PM2:
+                                            # 条件都满足,则把instance放入主机
+                                            result = result.append(pd.DataFrame(
+                                                [{"instanceid": df3["instanceid"][i],
+                                                  "machineid": "machine_" + str(j)}]))
+                                            tem_disk = tem_disk + df3["disk"][i]
+                                            tem_mem = tem_mem + df3["mem"][i]
+                                            tem_cpu = tem_cpu + df3["cpu"][i]
+                                            tem_P = tem_P + df3["P"][i]
+                                            tem_M = tem_M + df3["M"][i]
+                                            tem_PM = tem_PM + df3["PM"][i]
+                                            df3.loc[i, "isdploy"] = True
+                                            deploy_list.append(df3["instanceid"][i])
+
+            else:
+                # 主机j没有部署实例,则先部署一个
+                result = result.append(
+                    pd.DataFrame([{"instanceid": df3["instanceid"][i], "machineid": "machine_" + str(j)}]))
+                tem_disk = tem_disk + df3["disk"][i]
+                tem_mem = tem_mem + df3["mem"][i]
+                tem_cpu = tem_cpu + df3["cpu"][i]
+                tem_P = tem_P + df3["P"][i]
+                tem_M = tem_M + df3["M"][i]
+                tem_PM = tem_PM + df3["PM"][i]
+                df3.loc[i, "isdploy"] = True
+                deploy_list.append(df3["instanceid"][i])
+                is_deploy = True
 
 
 def plotGroup():  # df3新建一列
@@ -141,3 +229,7 @@ def plotGroup():  # df3新建一列
     # 找到每个instance消耗的disk
 
     # df3["disk"] =
+
+
+# 跑
+deplay()

+ 10 - 0
code/t2.py

@@ -0,0 +1,10 @@
+import pandas as pd
+
+letter = ['B', 'A', 'C', 'D', 'A', 'C', 'D', 'A']
+
+df = pd.Series(letter)
+ct = df.value_counts()
+print(ct.index)
+for k, v in ct.items():
+    print(k, v)
+    print("B..", ct["B"])

+ 30 - 0
code/t4.py

@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+'''
+@Auther :liuyuqi.gov@msn.cn
+@Time :2018/7/8 22:15
+@File :t4.py
+'''
+import pandas as pd
+
+df = pd.DataFrame([{"A": "11", "B": "12"}, {"A": "1111", "B": "1211"}])
+df["isdploy"] = False
+df.loc[0, "isdploy"] = True
+
+
+
+
+
+
+
+
+
+
+
+
+# df["is"][0] = True
+# df.loc[0][2] = True
+# df.loc[:, "is"] = True
+# df.set_value(index="0", col="isdploy", value=False)
+# print(df.get_value(index="isdploy", col=0))
+print(df)

+ 17 - 0
code/t_logging.py

@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+'''
+@Auther :liuyuqi.gov@msn.cn
+@Time :2018/7/8 14:01
+@File :t_logging.py
+'''
+import logging
+
+import logging.config
+
+logging.config.fileConfig("logger.conf")
+logger = logging.getLogger("example01")
+
+logger.debug('This is debug message')
+logger.info('This is info message')
+logger.warning('This is warning message')

+ 44 - 1
code/test_pandas.py

@@ -5,6 +5,7 @@
 @Time :2018/7/5 3:08
 @File :test_pandas.py
 '''
+
 import pandas as pd
 
 
@@ -68,4 +69,46 @@ def t6():
     print(df.ix[[1]].values[0][1])  # 第二行第二列的值 121
 
 
-t6()
+def t7():
+    '''
+    增加一行/一列
+    :return:
+    '''
+    df = pd.DataFrame([{"A": "11", "B": "12"}, {"A": "1111", "B": "1211"}])
+    # df.insert(value=list([22, 33]))
+    df = df.append(pd.DataFrame([{"A": "1133", "B": "1332"}]))
+    print(df)
+
+    # 增加一列:
+    df = pd.DataFrame([{"A": "11", "B": "12"}, {"A": "1111", "B": "1211"}])
+    df["is"] = False
+    print(df)
+
+
+def t8():
+    # 修改值不能直接引用:df3["mem"][i],而需要df3.loc["mem"][i]
+    df = pd.DataFrame([{"A": "11", "B": "12"}, {"A": "1111", "B": "1211"}])
+    df["is"] = False
+    # df["is"][0] = True
+    # df.loc[0][2] = True
+    # df.loc[:, "is"] = True
+    df.loc[0, "is"] = True
+    print(df)
+
+
+t8()
+
+# result = pd.DataFrame(columns=list(["instanceid", "machineid"]), data=list())
+
+# df = pd.DataFrame({'a': list(range(100)), 'b': [random.random() for i in range(100)]})
+# index = pd.MultiIndex.from_product([list('abcd'), list(range(25))])
+# df.index = index
+# print(df.head())
+# df.loc[('a', -1), :] = None
+# df.tail()
+#
+# data = pd.DataFrame({'a':[1,2,3], 'b':[4,5,6]})
+# data.index = pd.MultiIndex.from_tuples([('a', 1), ('b', 1), ('c', 1)])
+# data
+# new_df = df.append(data)
+# new_df.tail()

+ 27 - 0
libs/mlogging.py

@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+'''
+注意文件名直接写 logging.py 会出错!所以说python文件命名都有规范!
+@Auther :liuyuqi.gov@msn.cn
+@Time :2018/7/8 13:09
+@File :mlogging.py
+'''
+
+import logging
+
+class Log():
+    def __init__(self):
+        logging.basicConfig(level=logging.DEBUG,
+                        format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
+                        datefmt='%a, %d %b %Y %H:%M:%S',
+                        filename='myapp.log',
+                        filemode='w')
+
+    def debug(msg):
+        logging.debug(msg)
+
+    def info(msg):
+        logging.info(msg)
+
+    def warning(msg):
+        logging.warning(msg)