"""
Importiamo anche la libreria seaborn
per graficare i dati
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df=pd.DataFrame({
                   'A': pd.Series( np.random.randint(10, size=4), dtype='float32'),
                   'B': pd.Series( np.random.randint(100, size=4), dtype='float32'),
                   'C':pd.Series( np.random.randint(2000, size=4), dtype='float32'),
                   'D':pd.Series( np.random.randint(2000, size=4), dtype='int32'),
                   'E':pd.Series( np.random.randint(300, size=4), dtype='int32'),
                   'F': pd.Categorical(["test", "train", "test", "train"]),
                   'G': 'foo'
    
})

df

"""
Riassunto delle variabili statistiche
relativi alla tabella: deviazione standard, media
minimo, massimo, eccetera
"""
df.describe()

"""
Cambiamo l'intestazione (header)
"""
df

"""
Ora importiamo nuovamente la tabella usata nella lezione 1
e visualizziamo le prime 5 righe
"""

url="https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data"
df=pd.read_csv(url,header=None)
headers=["simbolo","norm-loss","marca","carburante","asp","n_di_porte","stile","trazione","lo_motore","ruote","lunghezza","larghezza","altezza","freno","tipo_motore","n_cicli","forma_motore","iniezione","bore","stroke","comp-ratio","horsepower","peak-rpm","city-mpg","high-mpg","prezzo"]
df.columns=headers
df.head(5)

"""
Ora contiamo quanti oggetti differenti ci sono nella
colonna "trazione"
"""
conteggio_trazione=df["trazione"].value_counts()

conteggio_trazione

fwd    120
rwd     76
4wd      9
Name: trazione, dtype: int64

"""
Cambiamo l'etichetta "conteggio" in valori_contati"
"""
conteggio_trazione.rename(columns={"conteggio":"valori_contati"})
conteggio_trazione.index.name="conteggio"

conteggio_trazione

conteggio
fwd    120
rwd     76
4wd      9
Name: trazione, dtype: int64

"""
Per fare un grafico 
"""
y=df["marca"]
x=df["prezzo"]

plt.scatter(x,y)
plt.title("Scatterplot of Model vs Price")
plt.xlabel("Prezzo")
plt.ylabel("Marca")

Text(0, 0.5, 'Marca')

	A	B	C	D	E	F	G
0	0.0	50.0	126.0	1714	219	test	foo
1	9.0	72.0	1185.0	1136	273	train	foo
2	3.0	42.0	1825.0	1369	159	test	foo
3	0.0	96.0	2.0	391	202	train	foo

	A	B	C	D	E
count	4.00000	4.000000	4.000000	4.000000	4.000000
mean	3.00000	65.000000	784.500000	1152.500000	213.250000
std	4.24264	24.248711	873.492798	560.448927	47.161955
min	0.00000	42.000000	2.000000	391.000000	159.000000
25%	0.00000	48.000000	95.000000	949.750000	191.250000
50%	1.50000	61.000000	655.500000	1252.500000	210.500000
75%	4.50000	78.000000	1345.000000	1455.250000	232.500000
max	9.00000	96.000000	1825.000000	1714.000000	273.000000

	A	B	C	D	E	F	G
0	0.0	50.0	126.0	1714	219	test	foo
1	9.0	72.0	1185.0	1136	273	train	foo
2	3.0	42.0	1825.0	1369	159	test	foo
3	0.0	96.0	2.0	391	202	train	foo

	simbolo	norm-loss	marca	carburante	asp	n_di_porte	stile	trazione	lo_motore	ruote	...	forma_motore	iniezione	bore	stroke	comp-ratio	horsepower	peak-rpm	city-mpg	high-mpg	prezzo
0	3	?	alfa-romero	gas	std	two	convertible	rwd	front	88.6	...	130	mpfi	3.47	2.68	9.0	111	5000	21	27	13495
1	3	?	alfa-romero	gas	std	two	convertible	rwd	front	88.6	...	130	mpfi	3.47	2.68	9.0	111	5000	21	27	16500
2	1	?	alfa-romero	gas	std	two	hatchback	rwd	front	94.5	...	152	mpfi	2.68	3.47	9.0	154	5000	19	26	16500
3	2	164	audi	gas	std	four	sedan	fwd	front	99.8	...	109	mpfi	3.19	3.40	10.0	102	5500	24	30	13950
4	2	164	audi	gas	std	four	sedan	4wd	front	99.4	...	136	mpfi	3.19	3.40	8.0	115	5500	18	22	17450