J'essaie de récupérer les données de Facebook, des pages publiques.
Le code que j'utilisais il y a quelques mois (10 mois peut-être) fonctionnait bien. Maintenant, lorsque j'ai voulu continuer ce projet, le code ne fonctionne plus. J'avais l'habitude d'utiliser mon jeton d'utilisateur privé, qui expire au bout de quelques minutes. Mais c'est suffisant pour mon cas d'utilisation. Je n'ai pas besoin d'une application, d'un examen d'application, etc. pour obtenir un jeton permanent.
Voici le code :
def getData(page, urlToConnect, startTime, filterStart, filterEnd):
posts = []
found = False
try:
while (True):
#print(url)
facebook_connection = urlopen(urlToConnect)
data = facebook_connection.read().decode('utf8')
json_object = json.loads(data)
#posts=json_object["data"]
allposts=json_object["data"]
allposts = np.asarray(allposts)
created = startTime
for i in range(0,100,1):
if (pd.to_datetime(allposts[i]['created_time']) > pd.to_datetime(created)):
posts.append(allposts[i])
else:
print(" found date at this index: ", i)
posts.append(allposts[i])
found = True
break;
if (i == 99):
urlToConnect = json_object["paging"]["next"]
if (found == True):
break;
df=pd.DataFrame(allposts)
df['Angry'] = df['Angry'].astype(str).str.replace('{\'data\':(.*?)count\': ','')
df['Angry'] = df['Angry'].str.replace(',(.*?)}}','')
df['Haha'] = df['Haha'].astype(str).str.replace('{\'data\':(.*?)count\': ','')
df['Haha'] = df['Haha'].str.replace('}}','')
df['Love'] = df['Love'].astype(str).str.replace('{\'data\':(.*?)count\': ','')
df['Love'] = df['Love'].str.replace('}}','')
df['Sad'] = df['Sad'].astype(str).str.replace('{\'data\':(.*?)count\': ','')
df['Sad'] = df['Sad'].str.replace(',(.*?)}}','')
df['Wow'] = df['Wow'].astype(str).str.replace('{\'data\':(.*?)count\': ','')
df['Wow'] = df['Wow'].str.replace('}}','')
df['comments'] = df['comments'].astype(str).str.replace('{\'data\':(.*?)count\': ','')
df['comments'] = df['comments'].str.replace(',(.*?)}}','')
df['likes'] = df['likes'].astype(str).str.replace('{\'(.*?)count\':','')
df['likes'] = df['likes'].str.replace(',(.*?)}}','')
df['shares'] = df['shares'].astype(str).str.replace('{\'count\': ','')
df['shares'] = df['shares'].str.replace('}','')
df['date'], df['time'] = df['created_time'].astype(str).str.split('T', 1).str
df['time'] = df['time'].str.replace('[+]0000','')
# Convert NaN's to 0 (as string)
df['shares'] = df['shares'].str.replace('nan','0')
df['shares'] = df['shares'].str.replace('Nan','0')
df['shares'] = df['shares'].str.replace('NaN','0')
# Convert Series values from str to int
df['shares'] = df['shares'].astype(int)
df['likes'] = df['likes'].astype(int)
df['comments'] = df['comments'].astype(int)
df['Love'] = df['Love'].astype(int)
df['Wow'] = df['Wow'].astype(int)
df['Sad'] = df['Sad'].astype(int)
df['Angry'] = df['Angry'].astype(int)
df['Haha'] = df['Haha'].astype(int)
# Sum over all number columns of one row
col_list= list(df)
df['total_reac'] = df[col_list].sum(axis=1)
# Sort values by 'total_reac' column, descending
df = df.sort_values(by='total_reac', ascending=False)
# Convert column from str to datetime
df['created_time'] = pd.to_datetime(df['created_time'])
# Filter for dates needed
df = df[(df['created_time'] > fStart) & (df['created_time'] <= fEnd)]
# Save Dataframe as csv
df.to_csv("Facebook_Posts_" + page + ".csv" )
except Exception as ex:
print (ex)
return df
token="my_User__Token_Here (got from my personal https://developers.facebook.com/tools/explorer)"
sTime = '2018-05-01'
fStart = '2018-05-01'
fEnd = '2018-05-29'
page_id="nytimes"
url="https://graph.facebook.com/3.2/"+page_id+"/posts/?fields=id,created_time,message,shares.summary(true).limit(0),comments.summary(true).limit(0),likes.summary(true),reactions.type(LOVE).limit(0).summary(total_count).as(Love),reactions.type(WOW).limit(0).summary(total_count).as(Wow),reactions.type(HAHA).limit(0).summary(total_count).as(Haha),reactions.type(SAD).limit(0).summary(1).as(Sad),reactions.type(ANGRY).limit(0).summary(1).as(Angry)&access_token="+token+"&limit=100"
dataNYT = getData(page_id, url, sTime, fStart, fEnd)
dataNYT.to_csv("NYT_posts.csv")
Voici l'erreur que j'obtiens maintenant :
HTTP Error 400: Bad Request
Et lorsque j'essaie de taper l'url demandée dans mon navigateur, cette erreur apparaît :
{
"error": {
"message": "Unknown path components: /nytimes/posts",
"type": "OAuthException",
"code": 2500,
"fbtrace_id": "HsN9zi+byTD"
}
}
Quelqu'un a une idée ?