test_pandas.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. '''
  4. @Auther :liuyuqi.gov@msn.cn
  5. @Time :2018/7/5 3:08
  6. @File :test_pandas.py
  7. '''
  8. import pandas as pd
  9. def t1():
  10. a = [['a', '1.2', '4.2'], ['b', '70', '0.03'], ['x', '5', '0']]
  11. df = pd.DataFrame(a, columns=list("ABC"))
  12. print(df.dtypes)
  13. print(df)
  14. def t2():
  15. obj = pd.Series(list('cadaabbcc'))
  16. uniques = obj.unique()
  17. print(obj.dtypes)
  18. print(uniques.shape)
  19. def t3():
  20. df = pd.DataFrame()
  21. df2 = pd.read_csv()
  22. df3 = pd.Series()
  23. pd.concat()
  24. pd.to_datetime()
  25. pd.merge()
  26. pd.Timestamp
  27. def t4():
  28. df = pd.DataFrame(columns=list("AB"), data=[[1, 2], [3, 4]])
  29. df["C"] = None
  30. df["C"][1] = 2
  31. print(df)
  32. def t5():
  33. ser1 = pd.Series([1, 2, 3, 4])
  34. ser2 = pd.Series(range(4), index=["a", "b", "c", "d"])
  35. sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
  36. ser3 = pd.Series(sdata)
  37. # print(ser1)
  38. print(ser2)
  39. # 访问Series
  40. ser2["a"]
  41. # 所有索引
  42. ser2.index
  43. # 所有值
  44. ser2.values
  45. def t6():
  46. '''
  47. 切片:
  48. :return:
  49. '''
  50. df = pd.DataFrame([{"A": "11", "B": "12"}, {"A": "111", "B": "121"}, {"A": "1111", "B": "1211"}])
  51. print(df)
  52. print(df.columns.size) # 列数 2
  53. h, l = df.shape
  54. print(h, l) # 3,2
  55. print(df.iloc[:, 0].size) # 行数 3
  56. print(df.ix[[0]].index.values[0]) # 索引值 0
  57. print(df.ix[[0]].values[0][0]) # 第一行第一列的值 11
  58. print(df.ix[[1]].values[0][1]) # 第二行第二列的值 121
  59. print(df.A, df.B)
  60. print(df["A"], df["B"])
  61. print(df.loc["A"])
  62. print(df.loc[df["A"] > 1])
  63. print(df.loc[pd.isna(df["A"])] == False)
  64. print(df[df.isna["A"]] == False) # .loc可以省略
  65. # iloc和loc:iloc按0,1,2,3等索引每行;loc按每列的列名索引
  66. def t7():
  67. '''
  68. 增加一行/一列
  69. :return:
  70. '''
  71. df = pd.DataFrame([{"A": "11", "B": "12"}, {"A": "1111", "B": "1211"}])
  72. # df.insert(value=list([22, 33]))
  73. df = df.append(pd.DataFrame([{"A": "1133", "B": "1332"}]))
  74. print(df)
  75. # 增加一列:
  76. df = pd.DataFrame([{"A": "11", "B": "12"}, {"A": "1111", "B": "1211"}])
  77. df["is"] = False
  78. print(df)
  79. def t8():
  80. # 修改值不能直接引用:df3["mem"][i],而需要df3.loc["mem"][i]
  81. df = pd.DataFrame([{"A": "11", "B": "12"}, {"A": "1111", "B": "1211"}])
  82. df["is"] = False
  83. # df["is"][0] = True
  84. # df.loc[0][2] = True
  85. # df.loc[:, "is"] = True
  86. df.loc[0, "is"] = True
  87. print(df)
  88. # DataFrame循环遍历
  89. def t9():
  90. df = pd.DataFrame({'a': [1, 2, 3], 'b': [3, 4, "C"]})
  91. for row in df.itertuples():
  92. print(row.a, row.b)
  93. for row in df.items():
  94. print(row[1][0], row[1][1], row[1][2])
  95. # 不推荐
  96. for row in df.iteritems():
  97. print(row[1][0], row[1][1], row[1][2])
  98. # 不推荐
  99. for row in df:
  100. print(df[row][0], df[row][1], df[row][2])
  101. def t10():
  102. for i in range(10):
  103. print(i)
  104. def t11():
  105. '''
  106. :return:
  107. '''
  108. df = pd.DataFrame({'a': [1, 2, 3], 'b': [3, 4, "C"]})
  109. print(df)
  110. df1 = df
  111. df2 = df.copy() #正确
  112. df1.a = [2, 2, 2] # 直接使用=只传址,df,df1任何更改,两个变量都更改
  113. df.b = [3, 3, 3]
  114. print(df1)
  115. t11()
  116. # result = pd.DataFrame(columns=list(["instanceid", "machineid"]), data=list())
  117. # df = pd.DataFrame({'a': list(range(100)), 'b': [random.random() for i in range(100)]})
  118. # index = pd.MultiIndex.from_product([list('abcd'), list(range(25))])
  119. # df.index = index
  120. # print(df.head())
  121. # df.loc[('a', -1), :] = None
  122. # df.tail()
  123. #
  124. # data = pd.DataFrame({'a':[1,2,3], 'b':[4,5,6]})
  125. # data.index = pd.MultiIndex.from_tuples([('a', 1), ('b', 1), ('c', 1)])
  126. # data
  127. # new_df = df.append(data)
  128. # new_df.tail()