[Java]数据分析--回归分析
线性回归
需求:从⽂件读取数据对,计算回归函数及系数
几岁学英语
实现1:commons.math的SimpleRegression,定义函数getData从⽂件读取数据返回SimpleRegression类
1import java.io.File;
2import java.io.FileNotFoundException;
3import java.util.Scanner;
4import s.ssion.SimpleRegression;
5
6public class Example1 {
7public static void main(String[] args) {
8 SimpleRegression sr = getData("data/Data1.dat");
9double m = sr.getSlope();
10double b = sr.getIntercept();
11double r = sr.getR(); // correlation coefficient
12double r2 = sr.getRSquare();
13double s = sr.getSumSquaredErrors();
14double tss = sr.getTotalSumSquares();
15
16 System.out.printf("y = %.6fx + %.4f%n", m, b);
17 System.out.printf("r = %.6f%n", r);
18 System.out.printf("r2 = %.6f%n", r2);
19 System.out.printf("EV = %.5f%n", tss - s);
20 System.out.printf("UV = %.4f%n", s);
21 System.out.printf("TV = %.3f%n", tss);
22 }
23
24public static SimpleRegression getData(String data) {
25 SimpleRegression sr = new SimpleRegression();
26try {
27 Scanner fileScanner = new Scanner(new File(data));
28 Line(); // read past title line
29int n = Int();
30 Line(); // read past line of labels
31 Line(); // read past line of labels
32for (int i = 0; i < n; i++) {
33 String line = Line();
34 Scanner lineScanner = new Scanner(line).uDelimiter("\\t");
35double x = Double();
36double y = Double();
37 sr.addData(x, y);
38 }
39 } catch (FileNotFoundException e) {
40 println(e);
41 }
42return sr;
43 }
44 }
View Code
实现2:直接计算统计量
1import java.io.File;
2import java.io.FileNotFoundException;
3import java.util.Scanner;
4
5public class Example2 {
6private static double sX=0, sXX=0, sY=0, sYY=0, sXY=0;
7private static int n=0;
8
9public static void main(String[] args) {
10 getData("data/Data1.dat");
11double m = (n*sXY - sX*sY)/(n*sXX - sX*sX);
12double b = sY/n - m*sX/n;
13double r2 = m*m*(n*sXX - sX*sX)/(n*sYY - sY*sY);
14double r = Math.sqrt(r2);
15double tv = sYY - sY*sY/n;
16double mX = sX/n; // mean value of x
17double ev = (sXX - 2*mX*sX + n*mX*mX)*m*m;
18double uv = tv - ev;
19
20 System.out.printf("y = %.6fx + %.4f%n", m, b);
21 System.out.printf("r = %.6f%n", r);
22 System.out.printf("r2 = %.6f%n", r2);
23 System.out.printf("EV = %.5f%n", ev);
24 System.out.printf("UV = %.4f%n", uv);
25 System.out.printf("TV = %.3f%n", tv);
26 }
27
28public static void getData(String data) {
29try {
30 Scanner fileScanner = new Scanner(new File(data));
31 Line(); // read past title line
32 n = Int();
33 Line(); // read past line of labels
34 Line(); // read past line of labels
35for (int i = 0; i < n; i++) {
36 String line = Line();
37 Scanner lineScanner = new Scanner(line).uDelimiter("\\t"); 38double x = Double();
39double y = Double();
40 sX += x;
41 sXX += x*x;
42 sY += y;
43 sYY += y*y;
44 sXY += x*y;
45 }
46 } catch (FileNotFoundException e) {
47 println(e);
48 }
49 }
50 }
View Code
y = 0.882279x + 18.8739
r = 0.935222
r2 = 0.874641
EV = 1423.35676
UV = 204.0042
TV = 1627.361
实现3:对辅助类进⾏实例化,并绘图
Example3.java
1import java.io.File;
2import javax.swing.JFrame;
失信不立3
4public class Example3 {
5public static void main(String[] args) {
6 Data data = new Data(new File("data/Data1.dat"));
7 JFrame frame = new Title());
8 frame.tDefaultCloOperation(JFrame.EXIT_ON_CLOSE);
9 RegressionPanel panel = new RegressionPanel(data);
10 frame.add(panel);
11 frame.pack();
12 frame.tSize(500, 422);
13 frame.tResizable(fal);
14 frame.tLocationRelativeTo(null); // center frame on screen
15 frame.tVisible(true);
16 }
17 }
View Code
Data.java
1import java.io.File;
2import java.io.FileNotFoundException;
3import java.util.Scanner;
4
5public class Data {
6private String title,xName, yName;
7private int n;
8private double[] x, y;
9private double sX, sXX, sY, sYY, sXY, minX, minY, maxX, maxY; 10private double meanX, meanY, slope, intercept, corrCoef;
11
12public Data(File inputFile) {
13try {
14 Scanner input = new Scanner(inputFile);
15 title = Line();
16 n = Int();
17 xName = ();
18 yName = ();
19 Line();
20 x = new double[n];
21 y = new double[n];
22 minX = minY = Double.POSITIVE_INFINITY;
23 maxX = maxY = Double.NEGATIVE_INFINITY;
24for (int i = 0; i < n; i++) {
25double xi = x[i] = Double();
26double yi = y[i] = Double();
27 sX += xi;
28 sXX += xi*xi;
29 sY += yi;
30 sYY += yi*yi;
31 sXY += xi*yi;
32 minX = (xi < minX? xi: minX);
33 minY = (yi < minY? yi: minY);
34 maxX = (xi > maxX? xi: maxX);
35 maxY = (yi > maxY? yi: maxY);
36 }
37 meanX = sX/n;
38 meanY = sY/n;
39 slope = (n*sXY - sX*sY)/(n*sXX - sX*sX);
40 intercept = meanY - slope*meanX;
41 corrCoef = slope*Math.sqrt((n*sXX - sX*sX)/(n*sYY - sY*sY));
42 } catch (FileNotFoundException e) {
43 println(e);
44 }
45 }
46
47public String getTitle() {
48return title;
49 }
50
51public String getXName() {
52return xName;
53 }
54
55public String getYName() {
56return yName;
57 }
58
59public int getN() {
60return n;
61 }
62
63public double[] getX() {
64return x;
65 }
66
67public double[] getY() {
68return y;
69 }
70
71public double getMeanX() {
72return meanX;
73 }
74
75public double getMeanY() {
76return meanY;
77 }
78
79public double getSlope() {
80return slope;
81 }
82
83public double getIntercept() {
金门列岛84return intercept;
85 }
86
87public double getCorrCoef() {
88return corrCoef;
89 }
90
91public double[][] getTable() {
92double[][] table = new double[n][2];
93for (int i = 0; i < n; i++) {
94 table[i][0] = x[i];
95 table[i][1] = y[i];
96 }
97return table;
98 }
99
100public double getMinX() {
101return minX;
102 }
103
104public double getMinY() {
105return minY;
106 }
107
108public double getMaxX() {
109return maxX;贫穷贵公子
魔芋烧鸭子110 }
111
112public double getMaxY() {
113return maxY;
114 }
115 }
View Code
RegressionPanal.java
import java.awt.BasicStroke;
import java.awt.Color;
import java.awt.Graphics;
import java.awt.Graphics2D;
import javax.swing.JPanel;
public class RegressionPanel extends JPanel {
private static final int WIDTH=500, HEIGHT=400, BUFFER=28, MARGIN=40; private final Data data;
private double xMin, xMax, yMin, yMax, xRange, yRange, gWidth, gHeight; private double slope, intercept;
public RegressionPanel(Data data) {
this.data = data;
this.tSize(WIDTH, HEIGHT);
this.xMin = MinX();
this.xMax = MaxX();
this.yMin = MinY();
this.yMax = MaxY();
this.slope = Slope();
this.intercept = Intercept();
this.xRange = xMax - xMin;
this.yRange = yMax - yMin;
this.gWidth = WIDTH - 2*MARGIN - BUFFER;
this.gHeight = HEIGHT - 2*MARGIN - BUFFER;
tBackground(Color.WHITE);
}
@Override
public void paintComponent(Graphics g) {
super.paintComponent(g);
Graphics2D g2 = (Graphics2D)g;
g2.tStroke(new BasicStroke(1));
什么食物补铁
drawGrid(g2);
drawPoints(g2, X(), Y());
drawLine(g2);
}
private void drawGrid(Graphics2D g2) {
g2.tStroke(new BasicStroke(1));
double xGd = Math.pow(10, Math.floor(Math.log10(xRange)));
int xd = dToI(xGd);
int x0 = dToI(xGd*Math.floor(xMin/xGd));
int xn = dToI(il(xMax/xGd));
for (int xi = x0; xi <= xn; xi += xd) {
g2.tColor(Color.LIGHT_GRAY);
生活养生小常识int p = f(xi);
g2.drawLine(p, 0, p, HEIGHT-18); // vertical lines
g2.tColor(Color.BLACK);
g2.drawString(""+xi, p-8, HEIGHT-4);
}
double yGd = Math.pow(10, Math.floor(Math.log10(yRange)));
int yd = dToI(yGd);
int y0 = dToI(xGd*Math.floor(xMin/yGd));
int yn = dToI(il(yMax/yGd));
for (int yi = y0; yi <= yn; yi += yd) {
王娡结局g2.tColor(Color.LIGHT_GRAY);
int q = g(yi);
g2.drawLine(BUFFER, q, WIDTH, q); // horizontal lines
g2.tColor(Color.LIGHT_GRAY);
g2.tColor(Color.BLACK);
g2.drawString((yi<100?" ":"")+yi, 2, q+5);
}
}
private void drawPoints(Graphics2D g2, double[] x, double[] y) { g2.tColor(Color.BLACK);
for (int i = 0; i < x.length; i++) {
int u = f(x[i]);
int v = g(y[i]);
g2.fillOval(u-3, v-3, 6, 6); // coordinates are at NW corners }
}
private void drawLine(Graphics2D g2) {
g2.tColor(Color.BLUE);
g2.tStroke(new BasicStroke(2));
int p0 = BUFFER;
int q0 = g(yLine(fInv(p0)));
int p1 = WIDTH;
int q1 = g(yLine(fInv(p1)));
g2.drawLine(p0, q0, p1, q1);
}
private double yLine(double x) {
return slope*x + intercept;
}
private int dToI(double x) {
return (und(x);
}
private int f(double x) {
return dToI((x - xMin)*gWidth/xRange) + BUFFER + MARGIN; }
private int g(double y) {
return dToI(gHeight - (y - yMin)*gHeight/yRange) + MARGIN; }
private double fInv(int p) {
return (p - BUFFER - MARGIN)*xRange/gWidth + xMin;
}
private double gInv(int q) {
return yMin + (gHeight + MARGIN - q)*yRange/gHeight;
}
}
View Code
多项式回归
需求:已知刹车速度和距离的数据,求解
实现:最⼩⼆乘法,解⽅程组,LU分解
1import s.math3.linear.*;
2