Considérons l'ensemble de données suivant :
import pandas as pd
data = {"store_name":{"0":"StoreName","1":"StoreName","2":"StoreName","3":"StoreName","4":"StoreName",
"5":"StoreName","6":"StoreName","7":"StoreName","8":"StoreName","9":"StoreName",
"10":"StoreName","11":"StoreName","12":"StoreName","13":"StoreName","14":"StoreName",
"15":"StoreName","16":"StoreName","17":"StoreName","18":"StoreName","19":"StoreName",
"20":"StoreName","21":"StoreName","22":"StoreName","23":"StoreName","24":"StoreName",
"25":"StoreName","26":"StoreName","27":"StoreName","28":"StoreName","29":"StoreName",
"30":"StoreName","31":"StoreName","32":"StoreName","33":"StoreName","34":"StoreName",
"35":"StoreName","36":"StoreName","37":"StoreName","38":"StoreName","39":"StoreName",
"40":"StoreName","41":"StoreName","42":"StoreName","43":"StoreName","44":"StoreName",
"45":"StoreName","46":"StoreName","47":"StoreName","48":"StoreName","49":"StoreName"},
"category":{"0":"Facial Care","1":"Food","2":"Food","3":"Food","4":"Food","5":"Food",
"6":"Soap & Bath","7":"Soap & Bath","8":"Facial Care","9":"Condiments & Sauces",
"10":"Yoga & Home Fitness","11":"Yoga & Home Fitness","12":"Honey & Sweeteners",
"13":"Honey & Sweeteners","14":"Honey & Sweeteners","15":"Honey & Sweeteners",
"16":"Honey & Sweeteners","17":"Honey & Sweeteners","18":"Honey & Sweeteners",
"19":"Honey & Sweeteners","20":"Honey & Sweeteners","21":"Honey & Sweeteners",
"22":"Candies Desserts & Toppings","23":"Honey & Sweeteners","24":"Honey & Sweeteners",
"25":"Honey & Sweeteners","26":"Dog","27":"Dog","28":"Cat","29":"Cat",
"30":"Cooking & Meal Ingredients","31":"Snacks","32":"Snacks","33":"Cooking & Meal Ingredients",
"34":"Cooking & Meal Ingredients","35":"Cooking & Meal Ingredients","36":"Bars, Cereals & Granolas",
"37":"Bars, Cereals & Granolas","38":"Candies Desserts & Toppings","39":"Cooking & Meal Ingredients",
"40":"Cooking & Meal Ingredients","41":"Snacks","42":"Snacks","43":"Cooking & Meal Ingredients",
"44":"Cooking & Meal Ingredients","45":"Cooking & Meal Ingredients","46":"Cooking & Meal Ingredients",
"47":"Snacks","48":"Cooking & Meal Ingredients","49":"Sun & Bug"},
"brand":{"0":"Brand1","1":"Brand2","2":"Brand2","3":"Brand2","4":"Brand2","5":"Brand2",
"6":"Brand3","7":"Brand3","8":"Brand3","9":"Brand4","10":"Brand5","11":"Brand5",
"12":"ƒBrand6","13":"Brand6","14":"Brand6","15":"Brand6","16":"Brand6","17":"Brand6",
"18":"Brand6","19":"Brand6","20":"Brand6","21":"Brand6","22":"Brand6","23":"Brand6",
"24":"Brand6","25":"Brand6","26":"Zuke\'s","27":"Zuke\'s","28":"Zuke\'s","29":"Zuke\'s",
"30":"Brand8","31":"Brand8","32":"Brand8","33":"Brand8","34":"Brand8","35":"Brand8",
"36":"Brand8","37":"Brand8","38":"Brand8","39":"Brand8","40":"Brand8","41":"Brand8",
"42":"Brand8","43":"Brand8","44":"Brand8","45":"Brand8","46":"Brand8","47":"Brand8",
"48":"Brand8","49":"Brand7"},
"store_price":{"0":4.49,"1":14.45,"2":13.49,"3":14.29,"4":13.99,"5":13.99,"6":2.65,"7":3.45,
"8":3.95,"9":3.75,"10":3.65,"11":6.95,"12":10.75,"13":10.75,"14":4.65,
"15":5.69,"16":3.95,"17":6.45,"18":3.45,"19":4.95,"20":4.45,"21":4.45,
"22":3.79,"23":4.95,"24":7.45,"25":7.49,"26":4.99,"27":4.99,"28":2.29,
"29":2.95,"30":1.89,"31":3.25,"32":3.25,"33":2.99,"34":2.99,"35":2.99,
"36":5.25,"37":5.25,"38":3.25,"39":2.25,"40":2.89,"41":4.25,"42":4.25,
"43":2.25,"44":2.05,"45":1.89,"46":2.49,"47":4.25,"48":2.49,"49":4.95},
"Comp1":{"0": None,"1":15.9,"2":13.7,"3":15.9,"4":14.59,"5":13.99,"6": None,"7": None,"8": None,"9": None,
"10":10.0,"11":20.0,"12":19.69,"13":20.8,"14": None,"15": None,"16": None,"17":6.18,"18": None,"19": None,
"20": None,"21": None,"22":5.99,"23": None,"24": None,"25":18.99,"26":6.39,"27":6.39,"28": None,"29": None,
"30": None,"31": None,"32": None,"33": None,"34": None,"35": None,"36": None,"37": None,"38": None,"39": None,"40": None,
"41": None,"42": None,"43": None,"44": None,"45": None,"46": None,"47": None,"48": None,"49":7.19},
"Comp5":{"0":6.72,"1": None,"2": None,"3": None,"4": None,"5": None,"6": None,"7": None,"8":5.79,"9": None,
"10": None,"11": None,"12":10.55,"13":11.4,"14": None,"15":8.27,"16":5.01,"17": None,"18": None,
"19":4.71,"20": None,"21":5.49,"22": None,"23": None,"24": None,"25": None,"26": None,"27":6.46,
"28": None,"29": None,"30": None,"31":3.94,"32": None,"33": None,"34": None,"35": None,"36":5.0,"37":5.0,
"38": None,"39": None,"40":4.31,"41":4.7,"42": None,"43": None,"44": None,"45": None,"46": None,"47":4.64,
"48": None,"49": None},
"Comp4":{"0":4.49,"1": None,"2": None,"3": None,"4": None,"5": None,"6":3.09,"7": None,"8":4.39,"9":4.59,
"10": None,"11": None,"12":10.79,"13":11.09,"14":5.13,"15":6.89,"16":4.39,"17":6.67,"18":3.59,
"19":5.21,"20":4.29,"21":4.99,"22":3.89,"23":5.39,"24": None,"25": None,"26":6.09,"27":6.09,
"28":2.89,"29": None,"30":2.19,"31":3.79,"32":3.32,"33":3.39,"34":3.39,"35":3.39,"36":6.29,
"37":6.29,"38":3.99,"39":2.89,"40":3.59,"41":4.99,"42":4.69,"43":2.89,"44":2.59,"45":2.19,
"46":2.99,"47":4.99,"48":2.99,"49": None},
"Comp2":{"0":4.77,"1":13.66,"2": None,"3":11.38,"4":14.59,"5": None,"6": None,"7":4.99,"8":4.99,"9": None,"10": None,
"11": None,"12":9.845,"13":13.12,"14": None,"15":11.23,"16":4.67,"17": None,"18":3.82,"19":3.88,"20":3.48,
"21":5.7,"22": None, "23":7.69,"24":8.18,"25": None,"26":8.99,"27": None,"28":1.95,"29":4.87,"30":1.72,
"31":2.69,"32":2.82,"33": None,"34": None,"35":2.43,"36":3.98,"37":3.98,"38":3.28,"39": None,"40": None,
"41":4.39,"42":3.99,"43":1.97,"44": None,"45":1.72,"46":2.49,"47": None,"48":2.48,"49":6.39},
"Comp3":{"0":6.6833333333,"1": None,"2": None,"3": None,"4": None,"5": None,"6":3.79,"7":4.12,"8":5.46,
"9": None,"10": None,"11": None,"12":13.57,"13":14.1233333333,"14":4.52,"15":10.035,"16":5.2066666667,
"17":6.23,"18":4.095,"19":6.4733333333,"20":5.3866666667,"21":5.025,"22":4.41,"23":7.525,
"24":8.38,"25": None,"26":7.42,"27":6.85,"28":3.21,"29": None,"30":2.3733333333,"31": None,"32":4.315,
"33": None,"34": None,"35": None,"36": None,"37": None,"38":4.7133333333,"39":3.495,"40":3.8833333333,
"41":5.73,"42":5.73,"43":3.495,"44": None,"45":2.575,"46":3.495,"47":5.79,"48":3.47,"49":8.21}}
df = pd.DataFrame(data)
Pour chaque marque et chaque comp[n], je cherche à obtenir le prix moyen en magasin par rapport au prix de la comp[n] lorsqu'il y a un prix pour cette comp[n] et un prix en magasin. J'avais essayé quelque chose comme :
brand = df.groupby('brand')['store_price','Comp1', 'Comp2', 'Comp3', 'Comp4', 'Comp5'].mean()
for comp in ['Comp1', 'Comp2', 'Comp3', 'Comp4', 'Comp5']:
brand[comp] = 1.0 * (brand['store_price']/brand[comp])
Cela n'a manifestement pas fonctionné car la moyenne de chaque Comp[n] a été comparée à la moyenne de l'ensemble de la marque pour StoreName. Il doit strictement s'agir d'un rapport dollar par dollar entre les articles dont le prix pour comp[n] et StoreName est reflété dans store_price.
Je me disais que je devrais peut-être faire quelque chose comme.. :
for i in df.index:
for comp in ['Comp1', 'Comp2', 'Comp3', 'Comp4', 'Comp5']:
df.loc[i, comp] = 1.0*(df.loc[i, 'store_price']/df.loc[i,comp])
brand = df.groupby('brand')['Comp1', 'Comp2', 'Comp3', 'Comp4', 'Comp5'].mean().reset_index()
Cependant, je pense qu'il doit y avoir une manière plus intelligente de découper les données, d'effectuer les calculs, puis de recoller ces tranches sur le cadre de données.