import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
raw_data = {'x1': [0.15, -0.1, 0, -0.25, 0.05],
'x2': [3500, 3400, 4000, 3900, 3200],
'y': [8, 5, 3, 1, 7]}
df = pd.DataFrame(raw_data)
df
plt.figure(figsize=(10, 7))
plt.scatter(df.x1,df.x2, label="Without scaling", color='blue')
plt.grid(True, alpha=0.6)
plt.title("Scatter", fontsize=20)
plt.xlabel("x1", fontsize=20)
plt.ylabel("x2", fontsize=20)
plt.legend()
plt.show()
def stddev(c):
v = 0
mn = round(sum(c),2)/len(c)
for i in range(len(c)):
v += (c[i]-mn)**2
v /= len(c)
return math.sqrt(v)
def scalingByNormalization(c):
newc = [None] * len(c)
mean = round(sum(c),2)/len(c)
sdev = stddev(c)
for i in range(len(c)):
newc[i] = (c[i] - mean)/ sdev
return newc
nx1= scalingByNormalization(df.x1)
nx2= scalingByNormalization(df.x2)
def scalingByRange(c):
newc = [None] * len(c)
mean = round(sum(c),2)/len(c)
r = max(c)- min(c)
for i in range(len(c)):
newc[i] = (c[i] - mean)/ r
return newc
mx1= scalingByRange(df.x1)
mx2= scalingByRange(df.x2)
plt.figure(figsize=(10, 7))
plt.scatter( nx1, nx2 ,label="Feature scaling by Normalization", color='orange')
plt.scatter(mx1, mx2 ,label="Feature scaling by range", color='green')
plt.grid(True, alpha=0.6)
plt.title("Scatter", fontsize=20)
plt.xlabel("x1", fontsize=20)
plt.ylabel("x2", fontsize=20)
plt.legend()
plt.show()
def scalingBetweenRange(c, r1, r2):
newc = [None] * len(c)
for i in range(len(c)):
newc[i] = ((r2-r1)* ((c[i] - min(c))/(max(c)-min(c)))) + r1
return newc
ny= scalingBetweenRange(df.y,-10, 20)
df2 = pd.DataFrame(list(zip(nx1, nx2, mx1, mx2, ny)),
columns =['x1N', 'x2N', 'x1R', 'x2R', 'ny'])
df2
def euclidian(c1, c2):
d=0
for i in range(len(c1)):
d += math.pow(c2[i] - c1[i], 2)
return math.sqrt(d)
v1 = [2, 1]
v2 = [3, 4]
print(euclidian(v1, v2))
def manhattan(c1, c2):
d=0
for i in range(len(c1)):
d += abs(c1[i] - c2[i])
return d
v1 = [2, 3]
v2 = [1, 4]
print(manhattan(v1,v2))
def Chebyshev(c1, c2):
subtractions = [None] * len(c1)
for i in range(len(c1)):
subtractions[i] = abs(c2[i] - c1[i])
return max(subtractions)
v1 = [2, 1]
v2 = [3, 4]
print(Chebyshev(v1, v2))
def Minkowski(c1,c2, p):
pp = 1/p
d=0
for i in range(len(c1)):
d += math.pow(abs(c1[i] - c2[i]),p)
return math.pow(d, pp)
v1 = [2, 1]
v2 = [3, 4]
print(Minkowski(v1,v2,0.7))