liuyuqi-dellpc 5 years ago
parent
commit
f4ac040abd
8 changed files with 291 additions and 162 deletions
  1. 112 66
      code/main.py
  2. 44 49
      java/me/yoqi/servermanager/Main.java
  3. 33 6
      test/t.py
  4. 0 17
      test/t_logging.py
  5. 57 0
      test/t_numpy.py
  6. 30 2
      test/test_pandas.py
  7. 0 19
      test/time.py
  8. 15 3
      utils/save_result.py

+ 112 - 66
code/main.py

@@ -11,7 +11,8 @@ from configparser import ConfigParser
 
 import pandas as pd
 
-import libs.save_conf
+import utils.save_conf
+import utils.save_result
 
 
 class Scheduling():
@@ -26,88 +27,93 @@ class Scheduling():
     EXEC_LIMIT = 100000
 
     # 静态数据 n:app数 N:inst数 m:machine数 k:资源种类
-    n = N = m = 0
-    k = 200
+    num_app = num_inst = num_mac = 0
+    num_k = 200
     cpuIter = list()
     appIndex = {}
     machineIndex = {}
     inst2AppIndex = {}
-    appIndexference = {}
 
-    apps = list()
-    machines = list()
-    appResources = list()
-    machineResources = list()
+    # apps = list()
+    # machines = list()
+    appResources = list()  # app表,
+    machineResources = list()  # machine表
+    instanceDeploy = list()  # instance表
+    appInterference = list()  # app_interfence冲突表
 
     # 动态数据
-    inst2Machine = {}
+    inst2MachineRemine = {}
     machineResourcesUsed = list()
-    machineHasApp = pd.DataFrame(columns=list(["instanceid", "machineid"]), data=list())
+    machineHasApp = list()  # 6000 [{}, {},{6004=1, 9126=1, 1598=1}, {}, {}, {},
+    inst2Machine = list()
+    result = pd.DataFrame(columns=list(["instanceid", "machineid"]), data=list())
 
     def __init__(self, **kw):
+        '''
+        初始化参数
+        :param kw:
+        '''
         for k, v in kw.items():
             setattr(self, k, v)
 
     def loadData(self):
-        '''   n
-               app_resources.csv
-               m
-               machine_resources.csv
-               N
-               instance_deploy.csv
-               iterference_cnt
-               app_interference.csv
-            judge framework
-        '''
+        for i in range(self.T):
+            self.cpuIter.append(i)
         app_interference, app_resources, instance_deploy, machine_resources = self.getConfig()
         # 1.app_resources 9338*201
         self.appResources = pd.read_csv(app_resources, header=None,
                                         names=list(["appid", "cpu", "mem", "disk", "P", "M", "PM"]), encoding="utf-8")
-
         tmp_cpu = self.appResources["cpu"].str.split('|', expand=True).astype('float')
         tmp_mem = self.appResources["mem"].str.split('|', expand=True).astype('float')
-
         for i in range(self.T):
-            # 新添加98列CPU限制
-            # self.appResources["cpu_" + str(i)] = None
-            # self.appResources["mem_" + str(i)] = None
-            # 赋值
             self.appResources["cpu_" + str(i)] = tmp_cpu[i]
             self.appResources["mem_" + str(i)] = tmp_mem[i]
-
         # 去掉cpu/men两列
         self.appResources.pop("cpu")
         self.appResources.pop("mem")
-        self.n, col = self.appResources.shape  # 9338*201 201列:appid,cpu_1,cpu_2,...mem_1,men_2....,P,M,PM
+        self.num_app, col = self.appResources.shape  # 9338*201 201列:appid,cpu_1,cpu_2,...mem_1,men_2....,P,M,PM
+        self.appResources["appid"] = pd.to_numeric(self.appResources["appid"].str.split("_", expand=True)[1].values)
 
         # 2.machine_resources 6000*201
         self.machineResources = pd.read_csv(machine_resources, header=None, names=list(
             ["machineid", "cpu", "mem", "disk", "P", "M", "PM"]), encoding="utf-8")
-
+        self.num_mac, col = self.machineResources.shape
         for i in range(self.T):
             self.machineResources["cpu_" + str(i)] = self.machineResources["cpu"]
             self.machineResources["mem_" + str(i)] = self.machineResources["mem"]
         self.machineResources.pop("cpu")
         self.machineResources.pop("mem")
-
+        self.machineResources["machineid"] = pd.to_numeric(
+            self.machineResources["machineid"].str.split("_", expand=True)[1].values)
         self.machineResourcesUsed = self.machineResources.copy()
-
         for i in range(200):
             self.machineResourcesUsed.iloc[:, i + 1] = 0
+        # 初始化 6000个空字典组成的list[{},{}....]
+        for i in range(self.num_mac):
+            self.machineHasApp.append({})
 
         # 3.instance_deploy
-        self.inst2Machine = pd.read_csv(instance_deploy, header=None,
-                                        names=list(["instanceid", "appid", "machineid"]), encoding="utf-8")
+        self.instanceDeploy = pd.read_csv(instance_deploy, header=None,
+                                          names=list(["instanceid", "appid", "machineid"]), encoding="utf-8")
         # 增加一个字段标注是否部署
-        self.inst2Machine["isdeploy"] = False
-        # # 4.app_interference 冲突表
-        self.appIndexference = pd.read_csv(app_interference, header=None,
+        self.instanceDeploy["isdeploy"] = False
+        self.instanceDeploy["instanceid"] = pd.to_numeric(
+            self.instanceDeploy["instanceid"].str.split("_", expand=True)[1].values)
+        self.instanceDeploy["appid"] = pd.to_numeric(self.instanceDeploy["appid"].str.split("_", expand=True)[1].values)
+        self.instanceDeploy["machineid"] = pd.to_numeric(
+            self.instanceDeploy["machineid"].str.split("_", expand=True)[1].values)
+
+        # 4.app_interference 冲突表
+        self.appInterference = pd.read_csv(app_interference, header=None,
                                            names=list(["appid1", "appid2", "max_interference"]), encoding="utf-8")
+        self.appInterference["appid1"] = pd.to_numeric(
+            self.appInterference["appid1"].str.split("_", expand=True)[1].values)
+        self.appInterference["appid2"] = pd.to_numeric(
+            self.appInterference["appid2"].str.split("_", expand=True)[1].values)
 
         # instance按照磁盘消耗排序
-        self.n, col = self.appResources.shape
-        self.N, col = self.machineResources.shape
-        self.m, col = self.inst2Machine.shape
+        self.num_app, col = self.appResources.shape
+        self.num_inst, col = self.instanceDeploy.shape
 
     def getConfig(self):
         '''
@@ -115,7 +121,8 @@ class Scheduling():
         :return:
         '''
         # 生成配置文件
-        self.init_conf()
+        # self.init_conf()
+
         # 读取配置文件
         cf = ConfigParser()
         config_path = "../conf/config.ini"
@@ -133,7 +140,7 @@ class Scheduling():
         初始化配置文件
         :retur
         '''
-        libs.save_conf.write()
+        utils.save_conf.write()
 
     def sort_dynamic(self):
         print("ss")
@@ -143,23 +150,35 @@ class Scheduling():
         先将instance从部署的主机中删除,删除一行,释放资源
         :return:
         '''
+        if instanceid not in self.inst2Machine["instance"]:
+            return
+        appid = self.inst2Machine[self.inst2Machine["instanceid"] == instanceid]["appid"].values[0]
+
+        # 更新machineResourcesUsed
+        for i in range(self.num_k):
+            machineResourcesUsed[fromMachine][j] -= appResources[appIt][i]
+        fromMachine = self.inst2AppIndex
         self.inst2Machine.pop(instanceid)
 
-    def toMachine(self, instanceid, machineid, doCheck=True):
+    def toMachine(self, instanceid, machineid, doCheck=False):
         '''
         检查互斥条件,然后把instance放入主机
+        :rtype: object
         :param instanceid: 实例id
         :param machineid: 主机id
         :param doCheck: 是否检测资源限制
         :return: True和False
         '''
-        appid = self.inst2Machine[self.inst2Machine["instanceid"] == instanceid]["appid"].values[0]
+        # instanceid所属的appid
+        appid = self.instanceDeploy[self.instanceDeploy["instanceid"] == instanceid]["appid"].values[0]
+        # machineid从1开始,而index从0开始
+        hasApp = self.machineHasApp[int(machineid - 1)]
         if doCheck:
             # 检查互斥
 
 
             # 检查资源限制
-            for i in range(self.k):
+            for i in range(self.num_k):
                 if (
                                 self.machineResourcesUsed[self.machineResourcesUsed["machineid"] == machineid].iloc[:,
                                 i + 1].values[0] +
@@ -175,56 +194,83 @@ class Scheduling():
                           i + 1].values[0])
                     # 如果不符合则 return False
                     return False
-        # instance占用资源
-        for i in range(self.k):
-            self.machineResourcesUsed[self.machineResourcesUsed["machineid"] == machineid].iloc[:, i + 1].values[0] += \
+        # 将inst放入新的machine,占用资源
+        self.inst2Machine.append([{"instanceid": instanceid, "machineid": machineid}])
+        if appid not in hasApp:
+            hasApp.update({appid: 1})
+        else:
+            hasApp.update({appid: hasApp.get(appid) + 1})
+        for i in range(self.num_k):
+            self.machineResourcesUsed[self.machineResourcesUsed["machineid"] == machineid].iloc[:, i + 1].values[
+                0] += \
                 self.appResources[self.appResources["appid"] == appid].iloc[:, i + 1].values[0]
         return True
 
-    def run(self):
+    def run(self, start):
         '''
-
+        执行部署
         :return:
         '''
         # 已经部署的instance
-        deployed_Instance = self.inst2Machine.loc[pd.isna(self.inst2Machine["machineid"]) == False]
+        deployed_Instance = self.instanceDeploy.loc[pd.isna(self.instanceDeploy["machineid"]) == False]
         count_deployed_Instance, col = deployed_Instance.shape
-        deployed_Instance = deployed_Instance.reset_index(drop=True)
+        deployed_Instance.reset_index(drop=True, inplace=True)
 
         # 将已经部署的instance放置到对应主机中,占用相应资源,这一块代码比java慢了太多
         for i in range(count_deployed_Instance):
-            print(i)
             instanceid = deployed_Instance["instanceid"][i]
             machineid = deployed_Instance["machineid"][i]
             self.toMachine(instanceid, machineid, doCheck=False)
+            print("初始部署第", i, "个,持续耗时", time.time() - start, "秒")
 
-        # 先对已经部署的主机列表按照资源消耗进行排序
+        # 对instance同样按照disk消耗排序
+        self.instanceDeploy = self.instanceDeploy.sort_values(ascending=False, by="disk")
 
-        # 先使用大主机,磁盘优先计算限制条件
-        row1, col = self.inst2Machine.shape
+        # 然后通过ff方法,把instance放入machine中。每次放入instance后,队列删除,每次消耗主机i,删除主机i.先使用大主机,磁盘优先计算限制条件
+        row1, col = self.instanceDeploy.shape
         while row1 > 0:
-            # 每部署一次,消耗一个主机
-            for row2 in self.inst2Machine.itertuples():
-                if row2.
-                self.toMachine(row2)
+            # 先对主机列表按照disk剩余进行排序,降序
+            self.machineResourcesUsed = self.machineResourcesUsed.sort_values(ascending=False, by="disk")
 
+            # 每部署一次,消耗一个主机
+            self.deployInstance()
             # 筛选未部署的
-            self.inst2Machine = self.inst2Machine[self.inst2Machine["isdeploy"] == False]
-            row, col = self.inst2Machine.shape
-            self.inst2Machine = self.inst2Machine.reset_index(drop=True)
+            self.instanceDeploy = self.instanceDeploy[self.instanceDeploy["isdeploy"] == False]
+            row, col = self.instanceDeploy.shape
+            self.instanceDeploy.reset_index(drop=True, inplace=True)
             j = j + 1
 
             print("已经部署:", 68219 - row, "剩余部署Instance数据:", row)
             print("已经消耗Machine主机数据:", j)
-        print("部署方案前几条示意:", self.machineHasApp.head())
-        libs.save_result.save_result(self.machineHasApp)
+        print("部署方案前几条示意:", self.result.head())
+        utils.save_result.save_result(self.result)
+
+    def dcmp(self, x):
+        '''
+        将结果映射到-1,0,1
+        :param x:
+        :return:
+        '''
+        if abs(x) < 1e-9:
+            return 0
+        elif x > 0:
+            return 1
+        else:
+            return -1
 
     def deployInstance(self):
         '''
         部署逻辑
         :return:
         '''
-        pass
+        # 主机:self.machineResourcesUsed["machineid"][0]
+        for row in self.instanceDeploy.itertuples():
+            i = row.Index
+            # 当前row实例尝试部署到新主机,如果可以部署则部署,如果初始已经部署,则算迁移,释放原来主机资源
+            if self.toMachine(row, machineid="", doCheck=True):
+                machineHasApp = machineHasApp.append(pd.DataFrame(
+                    [{"instanceid": row.instanceid,
+                      "machineid": "machine_" + str(j)}]))
 
 
 if __name__ == '__main__':
@@ -234,7 +280,7 @@ if __name__ == '__main__':
     # 加载数据
     scheduling.loadData()
     # 开始调度
-    scheduling.run()
+    scheduling.run(start)
     # 部署完事
     print("------------部署完啦--------------")
     end = time.time()

+ 44 - 49
java/me/yoqi/servermanager/Main.java

@@ -26,31 +26,31 @@ public class Main {
 	public static final int EXEC_LIMIT = 100000;
 
 	// 静态数据
-	private int n; // app数
-	private int N; // inst数
-	private int m; // machine数
-	private int k; // 资源种类
-	private List<Integer> cpuIter; // T个时刻的cpu资源
-	private Map<String, Integer> appIndex;
-	private Map<String, Integer> machineIndex;
-	private String[] apps;
-	private String[] machines;
-	private Map<String, Integer> inst2AppIndex;
+	private int num_app; // app数 9338
+	private int num_inst; // inst数 68219
+	private int num_mac; // machine数 6000
+	private int num_k; // 资源种类 200
+	private List<Integer> cpuIter; //[0, 1, 2, ... 97]
+	private Map<String, Integer> appIndex; //app_1,app_2字符串用0,1等数字替换,{app_1=0, app_2=1,...,app_4=3}
+	private Map<String, Integer> machineIndex;//{machine_1=0, machine_2=1,
+	private String[] apps; //[app_1, app_2, app_3, app_4, 
+	private String[] machines;//[machine_1, machine_2, machine_3, machine_4,
+	private Map<String, Integer> inst2AppIndex;//{inst_157=49}
 
-	private double[][] appResources;// app
-	private double[][] machineResources;// 主机
-	private Map<Integer, Integer>[] appInterference;// 限制条件
+	private double[][] appResources;// app 9338*200
+	private double[][] machineResources;// 主机 6000*200
+	private Map<Integer, Integer>[] appInterference;//9338 限制条件[{}, {}, {3517=2}, {}, {5600=1, 6747=2, 7707=2, 4830=2},
 
 	// 动态数据
-	private Map<String, Integer> inst2Machine;// 部署
-	private double[][] machineResourcesUsed;
-	private Map<Integer, Integer>[] machineHasApp;
+	private Map<String, Integer> inst2Machine;// 部署 {inst_33717=5456, inst_33713=3427, inst_33716=3983,
+	private Map<String, Integer> inst2MachineRemine;
+	private double[][] machineResourcesUsed;//6000*200
+	private Map<Integer, Integer>[] machineHasApp;//6000 [{}, {},{6004=1, 9126=1, 1598=1}, {}, {}, {},
 
 	/**
 	 * 先对disk排序,然后first fit
 	 */
 	private void run() {
-		// 未部署
 		
 		while(inst2Machine.size()>0) {
 			
@@ -69,19 +69,13 @@ public class Main {
 
 	// 读取数据
 	protected void init(BufferedReader bufferedReader) throws IOException {
-		/*
-		 * Preprocessing: cat *.csv to one file as: n app_resources.csv m
-		 * machine_resources.csv N instance_deploy.csv iterference_cnt
-		 * app_interference.csv judge framework
-		 */
-		/** cpuIter */
-		cpuIter = new ArrayList<Integer>();// 1,2,3....98
+		cpuIter = new ArrayList<Integer>();
 		for (int i = 0; i < T; i++)
 			cpuIter.add(i);
 		/** Read app_resources */
-		n = Integer.parseInt(bufferedReader.readLine());// 9338
-		apps = new String[n];
-		for (int i = 0; i < n; i++) {// 循环app表每一行
+		num_app = Integer.parseInt(bufferedReader.readLine());// 9338
+		apps = new String[num_app];
+		for (int i = 0; i < num_app; i++) {// 循环app表每一行
 			// appId,resources
 			String line = bufferedReader.readLine();
 			String[] parts = line.split(",", -1);
@@ -93,29 +87,28 @@ public class Main {
 			for (int j = 3; j < parts.length; j++) // disk/P/M/PM
 				resources.add(Double.parseDouble(parts[j]));
 			if (i == 0) {
-				k = resources.size();
+				num_k = resources.size();
 				appIndex = new HashMap<String, Integer>();
-				appResources = new double[n][k];
+				appResources = new double[num_app][num_k];
 			}
-			if (k != resources.size())
+			if (num_k != resources.size())
 				throw new IOException("[DEBUG 2]Invaild problem");
 			if (appIndex.containsKey(parts[0]))
 				throw new IOException("[DEBUG 3]Invaild problem");
-			appIndex.put(parts[0], i);// {app_5269=5268, app_5267=5266,
-										// app_6598=6597}
+			appIndex.put(parts[0], i);
 			apps[i] = parts[0];// [app_1, app_2, app_3, app_4]
-			for (int j = 0; j < k; j++)
+			for (int j = 0; j < num_k; j++)
 				appResources[i][j] = resources.get(j);
 		}
 		/** Read machine_resources */
-		m = Integer.parseInt(bufferedReader.readLine());// 6000
-		machineResources = new double[m][k];
-		machineResourcesUsed = new double[m][k];
+		num_mac = Integer.parseInt(bufferedReader.readLine());// 6000
+		machineResources = new double[num_mac][num_k];
+		machineResourcesUsed = new double[num_mac][num_k];
 		machineIndex = new HashMap<String, Integer>();// {machine_3791=3790,
 														// machine_3792=3791}
-		machineHasApp = new Map[m];
-		machines = new String[m];
-		for (int i = 0; i < m; i++) {
+		machineHasApp = new Map[num_mac];
+		machines = new String[num_mac];
+		for (int i = 0; i < num_mac; i++) {
 			// machineId,resources
 			String line = bufferedReader.readLine();
 			String[] parts = line.split(",", -1);
@@ -132,13 +125,14 @@ public class Main {
 			}
 			for (int j = 3; j < parts.length; j++)
 				machineResources[i][2 * T + j - 3] = Double.parseDouble(parts[j]);
-			for (int j = 0; j < k; j++)
+			for (int j = 0; j < num_k; j++)
 				machineResourcesUsed[i][j] = 0.;
+			
 		}
 		/** Read app_interference */
 		int icnt = Integer.parseInt(bufferedReader.readLine());// 35242
-		appInterference = new Map[n];
-		for (int i = 0; i < n; i++)
+		appInterference = new Map[num_app];
+		for (int i = 0; i < num_app; i++)
 			appInterference[i] = new HashMap<Integer, Integer>();
 		for (int i = 0; i < icnt; i++) {
 			String line = bufferedReader.readLine();
@@ -156,10 +150,10 @@ public class Main {
 			inter.put(app2, limit);
 		}
 		/** Read instance_deploy */
-		N = Integer.parseInt(bufferedReader.readLine());// 68219
+		num_inst = Integer.parseInt(bufferedReader.readLine());// 68219
 		inst2AppIndex = new HashMap<String, Integer>();// 68219*2 {inst_33717=8766, inst_33718=2956}
 		inst2Machine = new HashMap<String, Integer>();// {inst_33717=5456, inst_33713=3427}
-		for (int i = 0; i < N; i++) {
+		for (int i = 0; i < num_inst; i++) {
 			String line = bufferedReader.readLine();
 			String[] parts = line.split(",", -1);
 			if (inst2AppIndex.containsKey(parts[0]))
@@ -170,9 +164,10 @@ public class Main {
 			if (!"".equals(parts[2])) {
 				if (!machineIndex.containsKey(parts[2]))
 					throw new IOException("[DEBUG 7]Invaild problem");
-				toMachine(parts[0], machineIndex.get(parts[2]), true);
+				toMachine(parts[0], machineIndex.get(parts[2]), false);
 			}
 		}
+		System.out.println("finisl");
 	}
 
 	private String toMachine(String inst, int machineIt) {
@@ -207,7 +202,7 @@ public class Main {
 				}
 			}
 			// 检查资源限制,初始数据这里没有冲突
-			for (int i = 0; i < k; i++)
+			for (int i = 0; i < num_k; i++)
 				if (dcmp(machineResourcesUsed[machineIt][i] + appResources[appIt][i]
 						- machineResources[machineIt][i]) > 0) {
 					String res = "Resource Limit: inst: " + inst + ", " + "machine: " + machines[machineIt] + ", app: "
@@ -223,7 +218,7 @@ public class Main {
 		if (!hasApp.containsKey(appIt))
 			hasApp.put(appIt, 0);
 		hasApp.put(appIt, hasApp.get(appIt) + 1);
-		for (int i = 0; i < k; i++)
+		for (int i = 0; i < num_k; i++)
 			machineResourcesUsed[machineIt][i] += appResources[appIt][i];
 
 		return "success";
@@ -245,7 +240,7 @@ public class Main {
 		if (fromHasApp.get(appIt) <= 0)
 			fromHasApp.remove(appIt);
 		// 更新machineResourcesUsed
-		for (int i = 0; i < k; i++)
+		for (int i = 0; i < num_k; i++)
 			machineResourcesUsed[fromMachine][i] -= appResources[appIt][i];
 		// 更新inst2Machine
 		inst2Machine.remove(inst);
@@ -297,7 +292,7 @@ public class Main {
 		// 评测
 		Main evaluator = new Main();
 		evaluator.init(new BufferedReader(new InputStreamReader(problem, Charsets.UTF_8)));
-		evaluator.run();
+//		evaluator.run();
 
 		long endTime = System.currentTimeMillis(); // 获取结束时间
 		System.out.println("程序运行时间:" + (endTime - startTime) / 1000 + "s"); // 输出程序运行时间

+ 33 - 6
test/t.py

@@ -5,13 +5,40 @@
 @Time :2018/7/8 3:16
 @File :t.py
 '''
+import pandas as pd
 
-row = 100
 
-while row > 0:
-    print(row)
-    row = row - 20
+def t1():
+    # list
+    cpuIter = list()
+    for i in range(10):
+        cpuIter.append(i)
+    print(cpuIter)
 
-import libs.save_result
 
-libs.save_result.save_result(list())
+def t2():
+    # dict
+    res = list()
+    n = 5
+    for i in range(n):
+        res.append({})
+    res[1] = {"A": 2, "B": 3}
+    res.append({"appid": 0})
+    print(res[1].get("C") == None)
+    print(res)
+
+
+def t3():
+    # pandas dataframe
+    result = pd.DataFrame(columns=list(["instanceid", "machineid"]), data=list())
+    for i in range(5):
+        # result = result.append(pd.DataFrame(
+        #     [{"instanceid": "2",
+        #       "machineid": "machine_" + str(2)}]))
+        result = result.append(
+            [{"instanceid": "2",
+              "machineid": "machine_" + str(2)}])
+    print(result)
+
+
+t2()

+ 0 - 17
test/t_logging.py

@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-'''
-@Auther :liuyuqi.gov@msn.cn
-@Time :2018/7/8 14:01
-@File :t_logging.py
-'''
-import logging
-
-import logging.config
-
-logging.config.fileConfig("logger.conf")
-logger = logging.getLogger("example01")
-
-logger.debug('This is debug message')
-logger.info('This is info message')
-logger.warning('This is warning message')

+ 57 - 0
test/t_numpy.py

@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+'''
+注意文件名不要和Numpy等导入的包一样,否则导入包出错!
+@Auther :liuyuqi.gov@msn.cn
+@Time :7/20/2018 7:00 AM
+@File :t_numpy.py
+'''
+
+import numpy as np
+
+x = np.array([[1, 2, 3], [9, 8, 7], [6, 5, 4]])
+
+
+def t1():
+    '''
+    定义ndarray数组
+    :return:
+    '''
+    x = np.array([[1, 2, 3], [9, 8, 7], [6, 5, 4]])
+    print(x)
+    print(x.shape)  # 行,列数
+    print(type(x))  # 类型
+    print(x.flags)  # 返回数组内部的信息
+    print(x.size)  # 元素个数
+    print(x.ndim)  # 维数
+
+
+def t2():
+    '''
+    操作
+    :return:
+    '''
+    # 转置
+    print(x.T)
+
+    # 切片
+    # 将数组变为1维数组,并获取其中的一部分数据
+    print(x.flat[2:6])
+
+
+def t3():
+    '''
+    计算,求和/均值
+    :return:
+    '''
+
+
+def t4():
+    '''
+     1e-9 科学计数法,java中类似,10^(-9)
+    :return:
+    '''
+    print(0.000001 < 1e-9)
+
+
+t4()

+ 30 - 2
test/test_pandas.py

@@ -138,13 +138,41 @@ def t11():
     df = pd.DataFrame({'a': [1, 2, 3], 'b': [3, 4, "C"]})
     print(df)
     df1 = df
-    df2 = df.copy() #正确
+    df2 = df.copy()  # 正确
     df1.a = [2, 2, 2]  # 直接使用=只传址,df,df1任何更改,两个变量都更改
     df.b = [3, 3, 3]
     print(df1)
 
 
-t11()
+def t12():
+    '''
+    字符分割1 appid原来字符表示,现在去掉app_,直接用后缀数字表示。
+    '''
+    df = pd.DataFrame({'appid': ["app_1", "app_2", "app_3"], 'cpu': [3, 4, "C"]},
+                      columns=list(["appid", "cpu"]))
+    # tmp = pd.to_numeric(df["appid"].str.split("_", expand=True)[1].values)
+    # df[['col2', 'col3']] = df[['col2', 'col3']].apply(pd.to_numeric)
+    df["appid"] = pd.to_numeric(df["appid"].str.split("_", expand=True)[1].values)
+    print(df)
+
+
+def t13():
+    '''
+    字符串分割2
+    :return:
+    '''
+    s = pd.DataFrame(['a,b,c', 'c,d,e'])
+    print(s)
+
+    temp_expend_False = s[0].str.split(',')
+    print(temp_expend_False)
+
+    temp_expend_True = s[0].str.split(',', expand=True)
+    print(temp_expend_True)
+    print(temp_expend_True[1].values)
+
+
+t12()
 
 # result = pd.DataFrame(columns=list(["instanceid", "machineid"]), data=list())
 

+ 0 - 19
test/time.py

@@ -1,19 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-'''
-time和datetime两个包使用实例
-datetime是日期,time是时间。对应于手机日历和钟表两个应用。
-@Auther :liuyuqi.gov@msn.cn
-@Time :2018/7/8 2:52
-@File :time.py
-'''
-
-import datetime
-import time
-
-# time
-print(time.time())
-
-# datetime
-print(datetime.datetime.now())
-print()

+ 15 - 3
utils/save_result.py

@@ -6,8 +6,7 @@
 @File :save_result.py
 '''
 
-import datetime
-
+import datetime.datetime
 import pandas as pd
 
 
@@ -22,5 +21,18 @@ def save_result(df):
     # data = [["ss", "aa"], ["ss", "aa"], ["ss", "aa"], ["ss", "aa"]]
 
     # df = pd.DataFrame(data, columns=head)
-    df.to_csv(("../submit/submit_" + datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + ".csv"), header=None,
+    df.to_csv(("../submit/submit_" + datetime.now().strftime('%Y%m%d_%H%M%S') + ".csv"), header=None,
               index=False)
+
+
+def marge_ab(df_a, df_b):
+    '''
+    合并数据,并导出
+    :param df_a:
+    :param df_b:
+    :return:
+    '''
+    path_ab = "submit_" + datetime.now().strftime('%Y%m%d_%H%M%S') + ".csv"
+    df_ab = pd.merge(df_a, df_b)
+
+    df_ab.to_csv(path_ab, header=None, index=False)