Start TP2 big data

This commit is contained in:
flyingscorpio@clevo 2023-01-26 11:25:56 +01:00
parent df5e55bcf8
commit 2e8f4b2dcc
5 changed files with 4398 additions and 0 deletions

View file

@ -0,0 +1,75 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "8a515623",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6e140d1c",
"metadata": {},
"outputs": [],
"source": [
"# Lecture du fichier\n",
"train = pd.read_csv('Spam/training.csv',delimiter=';')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b53d403d",
"metadata": {},
"outputs": [],
"source": [
"# Affichage d'informations sur le fichier\n",
"\n",
"#print(train.columns.values) \n",
"#print(train.describe())\n",
"#print(train.head())\n",
"#print(train.tail())\n",
"#print(train.info())\n",
"#print(train.isnull().sum())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6e309da0",
"metadata": {},
"outputs": [],
"source": [
"# On renomme la colonne sur laquelle va porter la prédiction\n",
"train.rename(columns={\"GOAL-Spam\": \"GOAL_Spam\"}, inplace=True)\n",
"print(train.head())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

BIN
big-data/tp2/Spam/.DS_Store vendored Normal file

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

75
big-data/tp2/tp2.ipynb Normal file
View file

@ -0,0 +1,75 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "8a515623",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6e140d1c",
"metadata": {},
"outputs": [],
"source": [
"# Lecture du fichier\n",
"train = pd.read_csv('Spam/training.csv',delimiter=';')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b53d403d",
"metadata": {},
"outputs": [],
"source": [
"# Affichage d'informations sur le fichier\n",
"\n",
"#print(train.columns.values) \n",
"#print(train.describe())\n",
"#print(train.head())\n",
"#print(train.tail())\n",
"#print(train.info())\n",
"#print(train.isnull().sum())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6e309da0",
"metadata": {},
"outputs": [],
"source": [
"# On renomme la colonne sur laquelle va porter la prédiction\n",
"train.rename(columns={\"GOAL-Spam\": \"GOAL_Spam\"}, inplace=True)\n",
"print(train.head())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}