-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLinearRegression.R
74 lines (58 loc) · 2.85 KB
/
LinearRegression.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
library("ggplot2")
library("ggthemes")
library("scales")
library("reshape2")
library(tidyverse)
#TODO: Take max of found uncertainty vs uncertainty in fit of original data
# Give data as csv file with columns: x, xuncertainty, y, yuncertainty
# If you have a header on your file set header = TRUE below.
data = read.csv("testvalues4.csv", header = FALSE)
#TODO Fix this to use tidyverse conventions
normaldistribution = function(n, mean, uncertainty) {
#Assuming range contains 95% of data, sd = range/4
#Range = 2*uncertainty
sd = uncertainty/2
#rnorm() can take vectors (term for lists), will cycle through them
result = rnorm(n*length(mean), mean = mean, sd = sd)
#Create matrix of results. Each row is a point, and each column is a generated set of points.
resultmatrix = matrix(data = result, ncol = n, byrow = FALSE)
#Reorganize the result
gatheredData = gather(as_data_frame(resultmatrix))
return(gatheredData)
}
#Change the n value for these for more accuracy. Values of 10000 or higher seem to work well, but take some time.
numberofpoints = 1000
xdata = normaldistribution(numberofpoints, data[,1], data[,2])
ydata = normaldistribution(numberofpoints, data[,3], data[,4])
yerrors = aes(ymax = data[,3] + data[,4], ymin = data[,3] - data[,4])
xerrors = aes(xmax = data[,1] + data[,2], xmin = data[,1] - data[,2])
mergeddata = cbind(xdata, ydata)
mergeddata[,3] = NULL
names(mergeddata) = c("Index", "xValue", "yValue")
regressions = mergeddata %>% group_by(Index) %>% do(data.frame(as.list(coef(lsfit(matrix(.$xValue), .$yValue)))))
slopevalues = as.numeric(unlist(regressions[,3]))
interceptvalues = as.numeric(unlist(regressions[,2]))
bestlineslope = mean(slopevalues)
bestlineintercept = mean(interceptvalues)
slopeUncertainty = 2*sd(slopevalues)
interceptUncertainty = 2*sd(interceptvalues)
highslope = bestlineslope + slopeUncertainty
highintercept = bestlineintercept + interceptUncertainty
lowslope = bestlineslope - slopeUncertainty
lowintercept = bestlineintercept - interceptUncertainty
plot1 =
qplot(data[,1], data[,3]) +
theme_bw() +
#Change the below errorbar width and heights to match the graph.
geom_errorbar(yerrors, width = 0.2) +
geom_errorbarh(xerrors, height = 0.3) +
geom_abline(intercept = bestlineintercept, slope = bestlineslope) +
geom_abline(intercept = highintercept, slope = lowslope, linetype = 3) +
geom_abline(intercept = lowintercept, slope = highslope, linetype = 3) #+
#Below shows the generated data in red.
#geom_point(aes(xdata[,2], ydata[,2]), color = "red", alpha = 1/150) +
#Below shows the generated regressions lines in grey.
#geom_abline(intercept = interceptvalues, slope = slopevalues, alpha = 1/50, color = "grey")
print(plot1)
cat("\nSlope: ", bestlineslope, "Slope Uncertainty: ", slopeUncertainty,
"\nIntercept: ", bestlineintercept, "Intercept Uncertainty: ", interceptUncertainty)