0%

多项式回归补全

多项式回归补全

主要代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# 先查看线性回归的效果
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
def PolynomialRegression(degree):
# 传入每一步骤所对应的类 1.多项式的特征 2.数据归一化 3.线性回归
return Pipeline([
("poly", PolynomialFeatures(degree=degree)),
("std_scaler", StandardScaler()),
("lin_reg", LinearRegression())
])

for num in range(1,11):
#需要补全的数据源
data_sorce = pd.read_excel(r'F:\数据集\中青杯\原股票\原股票%d.xlsx'%num)
data = data_sorce.iloc[3:,:]
data['股票代码'] = pd.to_datetime(data['股票代码']) #将给定的数据按照指定格式转换成日期格式
data_g = pd.read_excel(r'F:\数据集\中青杯\数据%d.xlsx'%num) #这是一个空的数据表,空缺处均为-1
data_g.iloc[:,0] = pd.to_datetime(data_g.iloc[:,0]) #将给定的数据按照指定格式转换成日期格式
data_n = data.copy()
j = 0
for i in range(len(data_g.iloc[:,0])):
if data_g.iloc[i,0] == data_n.iloc[j,0]: #填补空缺
data_g.iloc[i,1] = data_n.iloc[j,1]
data_g.iloc[i,2] = data_n.iloc[j,2]
data_g.iloc[i,3] = data_n.iloc[j,3]
data_g.iloc[i,4] = data_n.iloc[j,4]
data_g.iloc[i,5] = data_n.iloc[j,5]
j += 1
else:
continue


#开始补全
data_c = data_g.copy() #防止出错
for que in range(1,6): #按列补全
X = []
y = []
for i in range(len(data_c.iloc[:,0])):
if data_c.iloc[i,que] != -1:
X.append(i)
y.append(data_c.iloc[i,que])
X = np.array(X).reshape(-1,1)
y = np.array(y).reshape(-1,1)
poly2_reg = PolynomialRegression(degree=40)
poly2_reg.fit(X, y)
# Out[6]:
# LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

y_predict = poly2_reg.predict(X)
for i in range(len(data_c.iloc[:,0])):
if data_c.iloc[i,que] == -1:
data_c.iloc[i,que] = round(poly2_reg.predict(np.array([i]).reshape(-1,1))[0][0],2)

data_c.to_excel(r"F:\数据集\中青杯\数据结果存放\数据%d补全结果.xlsx"%num) #保存