diff --git a/jupyter-notes/Panda Bamboo.ipynb b/jupyter-notes/Panda Bamboo.ipynb index 941cd30..4ee8038 100644 --- a/jupyter-notes/Panda Bamboo.ipynb +++ b/jupyter-notes/Panda Bamboo.ipynb @@ -11,6 +11,125 @@ "Docs:\n", "- https://pandas.pydata.org/docs/getting_started/index.html#getting-started" ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cleaning Game/Score/Rating Dataset\n", + "Error found: Game Names had Reviews attached to them" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Game Datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "filename = askopenfilename()\n", + "df1= pd.read_csv(filename)\n", + "from tkinter.filedialog import askopenfilename" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "### Cleaning: Removing the word review and anything after it" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Unclean showcase\n", + "unclean = df1\n", + "#limit this output 3 rows pls\n", + "print(unclean[['GameName']].head(5))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Name\n", + "0 Baldur's Gate 3 Early Access \n", + "1 Control: Ultimate Edition Cloud Version \n", + "2 Doom Eternal: The Ancient Gods Part 1 \n", + "3 Watch Dogs: Legion \n", + "4 Ring Of Pain \n", + "5 Pikmin 3 Deluxe \n", + "6 Ghostrunner \n", + "7 Disc Room \n", + "8 NHL 21 \n", + "9 Noita \n" + ] + } + ], + "source": [ + "# CLeaning\n", + "nuke=df1['GameName'].to_list()\n", + "nuke2 = list()\n", + "\n", + "for orphan in nuke : \n", + " orphan = orphan.split('Review')[0]\n", + " nuke2.append(orphan)\n", + "\n", + "df1['GameName']=nuke\n", + "\n", + "\n", + "\n", + "nuke_frame = pd.DataFrame(nuke2)\n", + "clean=df1.drop(columns=['GameName'])\n", + "\n", + "clean['Name'] = nuke2\n", + "#limit this output 3 rows pls\n", + "print(clean[['Name']].head(5))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# CSV output\n", + "df1.to_csv('cleaned_games.csv')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Integrating Game Sales with the previous cleaned data set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -20,8 +139,16 @@ "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", - "version": "3.10.2" + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" }, "orig_nbformat": 4 },