# regress.py - gets x,y data from file named # by user, and plots points and best-fit line # updated by cmc, 1/29/2015 # getData - asks user for filename, then # reads x,y data from the file # returns list of x,y tuples def getData(): filename = input("enter input file name --> ") dataFile = open(filename, "r") data = [] for line in dataFile: xyStrings = line.split() x = float(xyStrings[0]) y = float(xyStrings[1]) data.append( (x,y) ) dataFile.close() return data # plotRegression - Miller/Ranum Programming # Exercise 4.2 plots points and best-fit line # assumes data is a list of x,y tuples def plotRegression(data): n = len(data) # first find means of x and y values # key sums, and extremes sumX = sumY = sumX2 = sumXY = 0.0 maxX = minX = data[0][0] maxY = minY = data[0][1] for x,y in data: sumX = sumX + x sumY = sumY + y sumX2 = sumX2 + x*x sumXY = sumXY + x*y if x > maxX: maxX = x if x < minX: minX = x if y > maxY: maxY = y if y < minY: minY = y meanX = sumX / n meanY = sumY / n # now find slope of line numerator = sumXY - n * meanX * meanY denominator = sumX2 - n * meanX**2 slope = numerator / denominator # create a turtle and scale the window import turtle turtle = turtle.Turtle() turtle.hideturtle() turtle.speed(10) screen = turtle.getscreen() screen.setworldcoordinates(minX, minY, maxX, maxY) # plot the points for x,y in data: turtle.up() turtle.goto(x,y) turtle.down() turtle.dot() # plot the best-fit line turtle.color("red") turtle.up() turtle.goto(minX, meanY + slope * (minX - meanX)) turtle.down() turtle.goto(maxX, meanY + slope * (maxX - meanX)) screen.exitonclick() # main execution - just one statement necessary plotRegression( getData() )