In [2]:
import numpy as np
import pandas as pd
In [7]:
"""
Costruiamo un dataframe sul quale lavorare
"""
df=pd.DataFrame({
    "A":pd.Series([2.,np.nan,-40,29]),
    "B":pd.Categorical([np.nan,"cane","gatto","topo"]),
    "C":np.random.randint(100,size=4),
    "D":np.random.randint(300,size=4),
    "E":pd.date_range("20190406",periods=4)
})
In [8]:
df
Out[8]:
A B C D E
0 2.0 NaN 43 198 2019-04-06
1 NaN cane 82 121 2019-04-07
2 -40.0 gatto 31 133 2019-04-08
3 29.0 topo 40 87 2019-04-09
In [10]:
"""
Normalizzazione la colonna "D" usando il metodo
SIMPLE FEATURE SCALING:Xnew=Xold/Xmax
"""
df["D"]=df["D"]/df["D"].max()
In [11]:
df
Out[11]:
A B C D E
0 2.0 NaN 43 1.000000 2019-04-06
1 NaN cane 82 0.611111 2019-04-07
2 -40.0 gatto 31 0.671717 2019-04-08
3 29.0 topo 40 0.439394 2019-04-09
In [12]:
"""
Normalizzazione la colonna "C" usando il metodo
MIN-MAX :Xnew=(Xold-Xmin)/(Xmax-Xmin)
"""
df["C"]=(df["C"].min())/(df["C"].max()-df["C"].min())
In [13]:
df
Out[13]:
A B C D E
0 2.0 NaN 0.607843 1.000000 2019-04-06
1 NaN cane 0.607843 0.611111 2019-04-07
2 -40.0 gatto 0.607843 0.671717 2019-04-08
3 29.0 topo 0.607843 0.439394 2019-04-09
In [14]:
"""
Normalizzazione la colonna "A" usando il 
Z-METHOD :Xnew=(Xold-Xmin)/(Xmax-Xmin)
"""
df["A"]=(df["A"]-df["A"].mean())/df["A"].std()
In [15]:
df
Out[15]:
A B C D E
0 0.143799 NaN 0.607843 1.000000 2019-04-06
1 NaN cane 0.607843 0.611111 2019-04-07
2 -1.064115 gatto 0.607843 0.671717 2019-04-08
3 0.920316 topo 0.607843 0.439394 2019-04-09
In [ ]: