Refactored to return dataframes instead of writing
This commit is contained in:
@@ -10,13 +10,16 @@ global games_merged_dat
|
||||
games_dat = pd.read_csv("Games.xls")
|
||||
games_sales_dat = pd.read_csv("vgsales-12-4-2019-short.csv")
|
||||
|
||||
games_merged_dat = games_dat.merge(games_sales_dat, left_on="Name", right_on="Name", how="left")
|
||||
games_merged_dat = games_dat.merge(
|
||||
games_sales_dat, left_on="Name", right_on="Name", how="left"
|
||||
)
|
||||
print(games_merged_dat)
|
||||
|
||||
games_merged_dat.to_csv("output_final_df.csv")
|
||||
|
||||
|
||||
# Defining useful Functions to be used later
|
||||
def slice_column(input_df, output_df, column, expression=" "):
|
||||
def slice_column(input_df, column, expression=" "):
|
||||
unclean = input_df[column].to_list()
|
||||
clean = list()
|
||||
for record in unclean:
|
||||
@@ -25,9 +28,9 @@ def slice_column(input_df, output_df, column, expression=" "):
|
||||
|
||||
input_df = input_df.drop(columns=[column])
|
||||
input_df[column] = clean
|
||||
input_df.to_csv(output_df)
|
||||
return input_df
|
||||
|
||||
|
||||
def write_joined_df(left, right, output_file, lsuf="new_key"):
|
||||
def write_joined_df(left, right, lsuf="new_key"):
|
||||
merged = left.join(right, lsuffix=lsuf)
|
||||
merged.to_csv(output_file)
|
||||
return merged
|
||||
|
||||
Reference in New Issue
Block a user