# Linear regression and k means

1, Implement Polynomial Fitting with Linear Regression using linearregression.train.csv file, and predict linear-regression.test.csv.
2, Implement K-mean algorithm clustering using clustering.csv, and find best k of
this dataset.

Solution

Cluster.py

# coding: utf-8

# In[23]:

#Importing pandas library to read csv files and numpy library to use min function.

import pandas as pd

importnumpy as np

# In[25]:

# Thekmeans algorithm is implemented in the scikits-learn library

fromsklearn.cluster import KMeans

#To plot this library requires

importmatplotlib.pyplot as plt

# Toca

fromscipy.spatial.distance import cdist

# In[8]:

# In[30]:

distortions = []

K = range(1,10)

for k in K:

# k means clustering determine k

kmeanModel = KMeans(n_clusters=k).fit(A)

kmeanModel.fit(A)

distortions.append(sum(np.min(cdist(A, kmeanModel.cluster_centers_, ‘euclidean’), axis=1)) / A.shape[0])

# In[31]:

# Plot the elbow(Here I am using Kmeans elbow method to find best k value)

plt.plot(K, distortions, ‘bx-‘)

plt.xlabel(‘k’)

plt.ylabel(‘Distortion’)

plt.title(‘The Elbow Method showing the optimal k’)

plt.show()

# In[ ]:

##From above graph we get best k value as 4.

Linear.py

# coding: utf-8

# In[75]:

#Importing pandas library to read csv files and for other various purposes.

import pandas as pd

# In[76]:

#sOME OF THE LIBRARIES REQUIRE TO IMPLEMENT LINEAR REGRESSION MODEL AND PLOTS.

importmatplotlib.pyplot as plt

importnumpy as np

fromsklearn import datasets, linear_model

fromsklearn.metrics import mean_squared_error, r2_score

# In[77]:

# Load the Train and Test dataset

# In[78]:

#To view starting few values of train

# In[79]:

#Assign Column name to train and test data

Col_name = [‘Y’,’X’]

train.columns = Col_name

test.columns = Col_name

# In[80]:

#View starting few values of train with column name

# In[81]:

# Seperate train target variable

train_X =train.X[:,]

train_Y =train.Y[:,]

test_X =test.X[:,]

test_Y =test.Y[:,]

# In[82]:

# Create linear regression object

regr = linear_model.LinearRegression()

# In[83]:

# Train the model using the training sets

regr.fit(train_X.to_frame(), train_Y.to_frame())

# In[84]:

# Make predictions using the testing set

y_pred = regr.predict(test_X.to_frame())

# In[85]:

# The coefficients

print(‘Coefficients: \n’, regr.coef_)

# The mean squared error

print(“Mean squared error: %.2f”

% mean_squared_error(test_Y, y_pred))

# Explained variance score: 1 is perfect prediction

print(‘Variance score: %.2f’ % r2_score(test_Y, y_pred))

# In[90]:

# Plot outputs(I plot a scatter plot of test dataset(black) and prediction values(blue)).

plt.scatter(test_X, test_Y,  color=’black’)

plt.plot(test_X,y_pred, color=’blue’, linewidth=3)

plt.xticks(())

plt.yticks(())

plt.show()

# In[ ]:

# In[ ]: