Linear regression and k means
1, Implement Polynomial Fitting with Linear Regression using linearregression.train.csv file, and predict linear-regression.test.csv.
2, Implement K-mean algorithm clustering using clustering.csv, and find best k of
this dataset.
Implement plot in your python code to visualize your prediction.
Solution
Cluster.py
# coding: utf-8
# In[23]:
#Importing pandas library to read csv files and numpy library to use min function.
import pandas as pd
importnumpy as np
# In[25]:
# Thekmeans algorithm is implemented in the scikits-learn library
fromsklearn.cluster import KMeans
#To plot this library requires
importmatplotlib.pyplot as plt
# Toca
fromscipy.spatial.distance import cdist
# In[8]:
A = pd.read_csv(“C:\Users\Shashank\Downloads\Assignment\clustering.csv”)
# In[30]:
distortions = []
K = range(1,10)
for k in K:
# k means clustering determine k
kmeanModel = KMeans(n_clusters=k).fit(A)
kmeanModel.fit(A)
distortions.append(sum(np.min(cdist(A, kmeanModel.cluster_centers_, ‘euclidean’), axis=1)) / A.shape[0])
# In[31]:
# Plot the elbow(Here I am using Kmeans elbow method to find best k value)
plt.plot(K, distortions, ‘bx-‘)
plt.xlabel(‘k’)
plt.ylabel(‘Distortion’)
plt.title(‘The Elbow Method showing the optimal k’)
plt.show()
# In[ ]:
##From above graph we get best k value as 4.
Linear.py
# coding: utf-8
# In[75]:
#Importing pandas library to read csv files and for other various purposes.
import pandas as pd
# In[76]:
#sOME OF THE LIBRARIES REQUIRE TO IMPLEMENT LINEAR REGRESSION MODEL AND PLOTS.
importmatplotlib.pyplot as plt
importnumpy as np
fromsklearn import datasets, linear_model
fromsklearn.metrics import mean_squared_error, r2_score
# In[77]:
# Load the Train and Test dataset
train = pd.read_csv(“C:\Users\Shashank\Downloads\Assignment\l_rain.csv”, header = None)
test = pd.read_csv(“C:\Users\Shashank\Downloads\Assignment\l_est.csv”,header = None)
# In[78]:
#To view starting few values of train
train.head()
# In[79]:
#Assign Column name to train and test data
Col_name = [‘Y’,’X’]
train.columns = Col_name
test.columns = Col_name
# In[80]:
#View starting few values of train with column name
train.head()
# In[81]:
# Seperate train target variable
train_X =train.X[:,]
train_Y =train.Y[:,]
test_X =test.X[:,]
test_Y =test.Y[:,]
# In[82]:
# Create linear regression object
regr = linear_model.LinearRegression()
# In[83]:
# Train the model using the training sets
regr.fit(train_X.to_frame(), train_Y.to_frame())
# In[84]:
# Make predictions using the testing set
y_pred = regr.predict(test_X.to_frame())
# In[85]:
# The coefficients
print(‘Coefficients: \n’, regr.coef_)
# The mean squared error
print(“Mean squared error: %.2f”
% mean_squared_error(test_Y, y_pred))
# Explained variance score: 1 is perfect prediction
print(‘Variance score: %.2f’ % r2_score(test_Y, y_pred))
# In[90]:
# Plot outputs(I plot a scatter plot of test dataset(black) and prediction values(blue)).
plt.scatter(test_X, test_Y, color=’black’)
plt.plot(test_X,y_pred, color=’blue’, linewidth=3)
plt.xticks(())
plt.yticks(())
plt.show()
# In[ ]:
# In[ ]: