Refactored to return dataframes instead of writing
This commit is contained in:
@@ -10,13 +10,16 @@ global games_merged_dat
|
|||||||
games_dat = pd.read_csv("Games.xls")
|
games_dat = pd.read_csv("Games.xls")
|
||||||
games_sales_dat = pd.read_csv("vgsales-12-4-2019-short.csv")
|
games_sales_dat = pd.read_csv("vgsales-12-4-2019-short.csv")
|
||||||
|
|
||||||
games_merged_dat = games_dat.merge(games_sales_dat, left_on="Name", right_on="Name", how="left")
|
games_merged_dat = games_dat.merge(
|
||||||
|
games_sales_dat, left_on="Name", right_on="Name", how="left"
|
||||||
|
)
|
||||||
print(games_merged_dat)
|
print(games_merged_dat)
|
||||||
|
|
||||||
games_merged_dat.to_csv("output_final_df.csv")
|
games_merged_dat.to_csv("output_final_df.csv")
|
||||||
|
|
||||||
|
|
||||||
# Defining useful Functions to be used later
|
# Defining useful Functions to be used later
|
||||||
def slice_column(input_df, output_df, column, expression=" "):
|
def slice_column(input_df, column, expression=" "):
|
||||||
unclean = input_df[column].to_list()
|
unclean = input_df[column].to_list()
|
||||||
clean = list()
|
clean = list()
|
||||||
for record in unclean:
|
for record in unclean:
|
||||||
@@ -25,9 +28,9 @@ def slice_column(input_df, output_df, column, expression=" "):
|
|||||||
|
|
||||||
input_df = input_df.drop(columns=[column])
|
input_df = input_df.drop(columns=[column])
|
||||||
input_df[column] = clean
|
input_df[column] = clean
|
||||||
input_df.to_csv(output_df)
|
return input_df
|
||||||
|
|
||||||
|
|
||||||
def write_joined_df(left, right, output_file, lsuf="new_key"):
|
def write_joined_df(left, right, lsuf="new_key"):
|
||||||
merged = left.join(right, lsuffix=lsuf)
|
merged = left.join(right, lsuffix=lsuf)
|
||||||
merged.to_csv(output_file)
|
return merged
|
||||||
|
|||||||
Reference in New Issue
Block a user