In [1]:
import pandas as pan
import numpy as np
import plotly.express as px
pays = "en Wallonie-Bruxelles (Belgique)"

Wallonie-Bruxelles (Belgique)

mots seuls

In [2]:
media1 = pan.read_csv("belgique-motsSeuls-media-nettoye.csv", low_memory=False, names=["mot","interactions"])
nonmedia1 = pan.read_csv("belgique-motsSeuls-nonmedia-nettoye.csv", low_memory=False, names=["mot","interactions"])
In [3]:
media1
Out[3]:
mot interactions
0 vie 175299
1 bienvenue 175299
2 émilie 175299
3 dylan 175299
4 faire 175299
... ... ...
8426089 timon 7
8426090 wellenreuther 7
8426091 club 7
8426092 bruxellois 7
8426093 renforcer 7

8426094 rows × 2 columns

In [4]:
nonmedia1
Out[4]:
mot interactions
0 moment 224233
1 tendresse 224233
2 bébé 224233
3 bébé 224233
4 apprenon 224233
... ... ...
23350240 ici 5
23350241 visitgaume 5
23350242 soleildegaume 5
23350243 voter 5
23350244 logo 5

23350245 rows × 2 columns

In [5]:
media1_table = pan.pivot_table(media1,index=["mot"],values=["mot","interactions"],aggfunc=[len,np.sum])
nonmedia1_table = pan.pivot_table(nonmedia1,index=["mot"],values=["mot","interactions"],aggfunc=[len,np.sum])
In [6]:
media1_table
Out[6]:
len sum
interactions interactions
mot
#106cabrel 1 70
#balades 1 45
#breath4life 1 9
#cestbienlecas 1 422
#confinemer 1 49
... ... ...
그ー 1 34
그切 1 36
나4 1 5
무를 1 414
제이행 1 2645

128697 rows × 2 columns

In [7]:
nonmedia1_table
Out[7]:
len sum
interactions interactions
mot
#1 1 79
#2 3 24
#2020toussolidaire 1 826
#21juillet 1 693
#25 1 56
... ... ...
single 2 85
solidaire 1 12
watch 1 13
𐀁мвя 1 12
𐌾ᗩïᗩᙅ 1 11

471368 rows × 2 columns

In [8]:
media1_table["type"] = "media"
nonmedia1_table["type"] = "non-media"
In [9]:
media1_table
Out[9]:
len sum type
interactions interactions
mot
#106cabrel 1 70 media
#balades 1 45 media
#breath4life 1 9 media
#cestbienlecas 1 422 media
#confinemer 1 49 media
... ... ... ...
그ー 1 34 media
그切 1 36 media
나4 1 5 media
무를 1 414 media
제이행 1 2645 media

128697 rows × 3 columns

In [10]:
nonmedia1_table
Out[10]:
len sum type
interactions interactions
mot
#1 1 79 non-media
#2 3 24 non-media
#2020toussolidaire 1 826 non-media
#21juillet 1 693 non-media
#25 1 56 non-media
... ... ... ...
single 2 85 non-media
solidaire 1 12 non-media
watch 1 13 non-media
𐀁мвя 1 12 non-media
𐌾ᗩïᗩᙅ 1 11 non-media

471368 rows × 3 columns

In [11]:
tableau1 = pan.concat([media1_table,nonmedia1_table], names = ["mot","nb","interactions","type"], levels=0)
In [12]:
tableau1
Out[12]:
len sum type
interactions interactions
mot
#106cabrel 1 70 media
#balades 1 45 media
#breath4life 1 9 media
#cestbienlecas 1 422 media
#confinemer 1 49 media
... ... ... ...
single 2 85 non-media
solidaire 1 12 non-media
watch 1 13 non-media
𐀁мвя 1 12 non-media
𐌾ᗩïᗩᙅ 1 11 non-media

600065 rows × 3 columns

In [13]:
tableau1["mot"] = tableau1.index
tableau1.columns = ["nb","interactions","media","mot"]
tableau1.reset_index(drop=True, inplace=True)
tableau1
Out[13]:
nb interactions media mot
0 1 70 media #106cabrel
1 1 45 media #balades
2 1 9 media #breath4life
3 1 422 media #cestbienlecas
4 1 49 media #confinemer
... ... ... ... ...
600060 2 85 non-media single
600061 1 12 non-media solidaire
600062 1 13 non-media watch
600063 1 12 non-media 𐀁мвя
600064 1 11 non-media 𐌾ᗩïᗩᙅ

600065 rows × 4 columns

In [14]:
khi2_1 = pan.pivot_table(tableau1,index=["mot"],columns=["media"],values=["mot","interactions"],aggfunc=[len,np.sum],fill_value=0)
In [15]:
khi2_1
Out[15]:
len sum
interactions nb interactions nb
media media non-media media non-media media non-media media non-media
mot
#1 0 1 0 1 0 79 0 1
#106cabrel 1 0 1 0 70 0 1 0
#2 0 1 0 1 0 24 0 3
#2020toussolidaire 0 1 0 1 0 826 0 1
#21juillet 0 1 0 1 0 693 0 1
... ... ... ... ... ... ... ... ...
single 0 1 0 1 0 85 0 2
solidaire 0 1 0 1 0 12 0 1
watch 0 1 0 1 0 13 0 1
𐀁мвя 0 1 0 1 0 12 0 1
𐌾ᗩïᗩᙅ 0 1 0 1 0 11 0 1

506548 rows × 8 columns

In [16]:
khi2_1["mot"] = khi2_1.index
khi2_1.columns = khi2_1.columns.get_level_values(0)
khi2_1.reset_index(drop=True, inplace=True)
khi2_1.columns = ["a","b","c","d","interactions_media","interactions_nonmedia","obs_media","obs_nonmedia","mot"]
khi2_1 = khi2_1.drop(columns=["a","b","c","d"])
khi2_1
Out[16]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot
0 0 79 0 1 #1
1 70 0 1 0 #106cabrel
2 0 24 0 3 #2
3 0 826 0 1 #2020toussolidaire
4 0 693 0 1 #21juillet
... ... ... ... ... ...
506543 0 85 0 2 single
506544 0 12 0 1 solidaire
506545 0 13 0 1 watch
506546 0 12 0 1 𐀁мвя
506547 0 11 0 1 𐌾ᗩïᗩᙅ

506548 rows × 5 columns

In [17]:
khi2_1.obs_media.sum(), khi2_1.obs_nonmedia.sum()
Out[17]:
(8426093, 23350157)
In [18]:
print("Nb de lignes fichier media1 = ", media1.shape[0])
print("Somme observée média khi2_1 = ", khi2_1.obs_media.sum())

print("Nb de lignes  fichier nonmedia1 = ", nonmedia1.shape[0])
print("Somme observée nonmédia khi2_1 = ", khi2_1.obs_nonmedia.sum())
Nb de lignes fichier media1 =  8426094
Somme observée média khi2_1 =  8426093
Nb de lignes  fichier nonmedia1 =  23350245
Somme observée nonmédia khi2_1 =  23350157
In [19]:
khi2_1["exp_media"] = ((khi2_1.obs_media + khi2_1.obs_nonmedia) * khi2_1.obs_media.sum()) / (khi2_1.obs_media.sum() + khi2_1.obs_nonmedia.sum())
khi2_1["exp_nonmedia"] = ((khi2_1.obs_media + khi2_1.obs_nonmedia) * khi2_1.obs_nonmedia.sum()) / (khi2_1.obs_media.sum() + khi2_1.obs_nonmedia.sum())
khi2_1
Out[19]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia
0 0 79 0 1 #1 0.265170 0.734830
1 70 0 1 0 #106cabrel 0.265170 0.734830
2 0 24 0 3 #2 0.795509 2.204491
3 0 826 0 1 #2020toussolidaire 0.265170 0.734830
4 0 693 0 1 #21juillet 0.265170 0.734830
... ... ... ... ... ... ... ...
506543 0 85 0 2 single 0.530339 1.469661
506544 0 12 0 1 solidaire 0.265170 0.734830
506545 0 13 0 1 watch 0.265170 0.734830
506546 0 12 0 1 𐀁мвя 0.265170 0.734830
506547 0 11 0 1 𐌾ᗩïᗩᙅ 0.265170 0.734830

506548 rows × 7 columns

In [20]:
khi2_1["res_media"] = (khi2_1.obs_media - khi2_1.exp_media) / np.sqrt(khi2_1.exp_media)
khi2_1["res_nonmedia"] = (khi2_1.obs_nonmedia - khi2_1.exp_nonmedia) / np.sqrt(khi2_1.exp_nonmedia)
khi2_1
Out[20]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia
0 0 79 0 1 #1 0.265170 0.734830 -0.514946 0.309336
1 70 0 1 0 #106cabrel 0.265170 0.734830 1.427005 -0.857223
2 0 24 0 3 #2 0.795509 2.204491 -0.891913 0.535785
3 0 826 0 1 #2020toussolidaire 0.265170 0.734830 -0.514946 0.309336
4 0 693 0 1 #21juillet 0.265170 0.734830 -0.514946 0.309336
... ... ... ... ... ... ... ... ... ...
506543 0 85 0 2 single 0.530339 1.469661 -0.728244 0.437467
506544 0 12 0 1 solidaire 0.265170 0.734830 -0.514946 0.309336
506545 0 13 0 1 watch 0.265170 0.734830 -0.514946 0.309336
506546 0 12 0 1 𐀁мвя 0.265170 0.734830 -0.514946 0.309336
506547 0 11 0 1 𐌾ᗩïᗩᙅ 0.265170 0.734830 -0.514946 0.309336

506548 rows × 9 columns

In [21]:
graph_media1 = khi2_1.sort_values(by=["res_media"],ascending=False).head(50)
graph_media1
Out[21]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia
132650 19325018 14609115 65421 40483 coronavirus 28082.512980 77821.487020 222.812080 -133.846480
85116 11838300 9946661 39230 36723 belgique 20140.420648 55812.579352 134.512328 -80.803526
84949 7968684 6626220 29100 25891 belge 14581.937144 40409.062856 120.226737 -72.221962
61895 4996615 6088391 21922 19685 annoncer 11032.908271 30574.091729 103.668461 -62.275163
445838 2212544 1126940 10607 4776 trump 4079.102746 11303.897254 102.209435 -61.398705
60496 1647774 504751 8958 3632 anderlecht 3338.484273 9251.515727 97.257751 -58.424156
151790 3824512 613719 7715 2487 diable 2705.259456 7496.740544 96.318735 -57.860075
419647 1562305 947363 9051 4036 standard 3470.273525 9616.726475 94.734656 -56.908496
90053 1594174 509468 6908 2791 bilan 2571.879187 7127.120813 85.501964 -51.362282
134732 5833072 7996479 24467 29475 covid-19 14303.774316 39638.225684 84.977949 -51.047499
122413 2559615 4069796 17655 18057 club 9469.733943 26242.266057 84.113153 -50.528003
270878 2243930 1023084 8705 5258 league 3702.562025 10260.437975 82.211116 -49.385421
157498 1427126 616647 6287 2621 donald 2362.130095 6545.869905 80.755732 -48.511151
163866 2163216 1355137 8557 5302 décès 3674.984395 10184.015605 80.532543 -48.377078
117186 1866221 1391811 7945 4551 chiffre 3313.558338 9182.441662 80.457936 -48.332261
197835 1210570 1153861 8066 4841 football 3422.543011 9484.456989 79.371935 -47.679884
59873 2115405 1717005 10378 8005 américain 4874.611309 13508.388691 78.824312 -47.350919
109634 4144422 5605739 19009 22028 cas 10881.761644 30155.238356 77.910034 -46.801698
325957 8847306 17145420 41255 65168 nouveau 28220.135961 78202.864039 77.593788 -46.611725
100987 764641 218762 4287 1065 bruges 1419.187278 3932.812722 76.125636 -45.729784
487072 2305015 1620938 8960 6472 épidémie 4092.096052 11339.903948 76.097272 -45.712746
282842 4801225 6327805 21076 26267 lundi 12553.920645 34789.079355 76.059924 -45.690310
130366 1246167 744332 5763 2563 contamination 2207.801434 6118.198566 75.663052 -45.451904
298109 3672515 4664767 17463 19992 mercredi 9931.924419 27523.075581 75.568413 -45.395053
103817 229005 85140 4261 1112 bx1 1424.755838 3948.244162 75.140381 -45.137927
59979 13791596 29344381 39789 64040 an 27532.286223 76296.713777 73.867416 -44.373239
341732 1797131 2704078 10524 9338 pandémie 5266.797031 14595.202969 72.440502 -43.516071
182234 1023335 526013 5174 2265 etats-unis 1972.596069 5466.403931 72.081098 -43.300171
223337 3107469 359102 3590 757 hazard 1152.691909 3194.308091 71.788323 -43.124297
251958 2522203 3467963 10756 9852 joueur 5464.613494 15143.386506 71.579681 -42.998963
323511 2340582 2677376 11777 11500 nombre 6172.350946 17104.649054 71.338359 -42.853997
114287 2222118 2288515 6824 4437 champion 2986.073979 8274.926021 70.233824 -42.190487
290644 3496090 4659470 16055 19062 mardi 9311.958078 25805.041922 69.877177 -41.976244
222806 821491 711923 4247 1593 hausse 1548.590004 4291.409996 68.570878 -41.191531
302676 4377445 5782810 17955 22984 ministre 10855.775031 30083.224969 68.136610 -40.930660
249970 3958967 4938957 15846 19397 jeudi 9345.369438 25897.630562 67.244574 -40.394801
366329 3230123 3949865 16141 20202 président 9637.055911 26705.944089 66.252857 -39.799062
441269 2201255 3044567 10611 10678 tour 5645.193938 15643.806062 66.092217 -39.702563
454506 1298407 1516450 6317 4304 vaccin 2816.365485 7804.634515 65.963283 -39.625110
163879 2690518 2578659 6311 4299 décéder 2813.448621 7796.551379 65.939342 -39.610729
149042 3922699 9062212 17206 22532 dernier 10537.306436 29200.693564 64.964478 -39.025114
232582 3917914 4788672 10125 10201 hôpital 5389.835689 14936.164311 64.498155 -38.744986
428823 2051059 3653799 9189 8711 t 4746.534431 13153.465569 64.481592 -38.735037
238792 844371 560880 3990 1660 infection 1498.207795 4151.792205 64.376268 -38.671767
459886 520436 441857 4741 2509 verviers 1922.479029 5327.520971 64.282161 -38.615235
389330 3887735 2284497 10170 10484 rouge 5476.811292 15177.188708 63.416771 -38.095384
237347 771795 691091 4411 2211 incendie 1755.952570 4866.047430 63.360106 -38.061344
457748 4694112 6621349 18947 26733 vendredi 12112.943731 33567.056269 62.094581 -37.301124
229085 704232 356016 3357 1191 hospitalisation 1205.990983 3342.009017 61.939857 -37.208180
308194 3048268 5221458 10679 11654 mort 5922.030918 16410.969082 61.815159 -37.133272
In [22]:
fig = px.bar(graph_media1, x="res_media", y="mot", labels={"res_media":"Résiduel de Pearson","mot":"Lemme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="red", opacity=0.75, textfont_size=12)
fig
In [23]:
graph_nonmedia1 = khi2_1.sort_values(by=["res_nonmedia"],ascending=False).head(50)
graph_nonmedia1
Out[23]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia
358150 589111 19280588 2638 62212 post 17196.243454 47653.756546 -111.017690 66.689952
201650 450691 17394426 1252 53169 from 14430.790517 39990.209483 -109.706029 65.902018
350552 2568016 28083848 8714 85736 photo 25045.261283 69404.738717 -103.194593 61.990504
488547 2070626 22454212 7169 51108 être 15453.284191 42823.715809 -66.641474 40.032509
22779 2049763 12657714 12851 68829 2020 21659.046497 60020.953503 -59.849418 35.952422
239067 2026791 7751518 5481 39147 info 11833.985395 32794.014605 -58.399932 35.081695
75786 1652840 15207630 5267 37873 avoir 11439.413147 31700.586853 -57.710271 34.667405
465870 1536650 11700589 5609 38487 vous 11692.915210 32403.084790 -56.262853 33.797920
340701 416861 14410454 2132 23092 page 6688.636004 18535.363996 -55.715439 33.469080
435543 570475 9326705 2785 26021 the 7638.473229 21167.526771 -55.532773 33.359350
237026 419335 6675426 1569 19993 in 5717.585217 15844.414783 -54.864786 32.958081
154887 273251 5486574 1786 20071 disponible 5795.810226 16061.189774 -52.670415 31.639890
28573 89325 11357803 338 12178 3 3318.861728 9197.138272 -51.742481 31.082466
56512 5433 5089805 16 9989 allah 2653.021060 7351.978940 -51.196851 30.754698
330471 169051 3002291 2015 20258 of 5906.120747 16366.879253 -50.631821 30.415276
94785 3274888 34801436 14457 68346 bon 21956.831869 60846.168131 -50.613537 30.404293
152655 107371 7476766 386 10792 dieu 2964.064909 8213.935091 -47.353272 28.445804
277735 298537 3803846 1266 15040 lire 4323.854214 11982.145786 -46.503030 27.935052
170172 10470 1141065 36 8299 een 2210.187960 6124.812040 -46.246885 27.781181
188929 6160031 44825968 25377 102298 faire 33855.518627 93819.481373 -46.079233 27.680470
430897 8481 518139 47 8249 taquin 2199.846348 6096.153652 -45.900441 27.573067
276244 185924 3624725 1140 14187 lien 4064.253252 11262.746748 -45.869563 27.554518
87511 1067313 7980031 2818 21415 besoin 6425.853009 17807.146991 -45.007350 27.036574
343768 866330 10256637 3083 22249 partager 6717.274313 18614.725687 -44.342601 26.637250
437949 110254 2815200 755 11515 timeline 3253.630026 9016.369974 -43.804414 26.313953
244725 114856 2127421 1246 13931 is 4024.477824 11152.522176 -43.797787 26.309972
240595 33678 595149 405 9496 inscription 2625.443430 7275.556570 -43.334958 26.031944
233293 1293568 13928809 6202 33565 ici 10544.996352 29222.003648 -42.292781 25.405893
53548 341488 8451007 1034 12027 aimer 3463.379117 9597.620883 -41.280531 24.797820
54667 112061 2891901 567 9674 al 2715.601067 7525.398933 -41.230899 24.768005
225660 215204 4882694 815 10841 het 3090.815940 8565.184060 -40.935544 24.590581
60387 71716 2783388 484 9130 and 2549.339777 7064.660223 -40.905118 24.572303
192130 2175865 12615606 6712 34608 femme 10956.804619 30363.195381 -40.552324 24.360375
438995 68438 2418437 450 8799 to 2452.552902 6796.447098 -40.436616 24.290868
71463 70369 733185 696 10078 atelier 2856.936422 7917.063578 -40.428879 24.286220
130257 163471 2277940 680 9993 contacter 2830.154300 7842.845700 -40.417046 24.279112
349213 3116610 31943611 12426 54765 petit 17817.005303 49373.994697 -40.388005 24.261666
232509 206495 2553774 1049 11757 hésiter 3395.760889 9410.239111 -40.271730 24.191818
58920 1237266 14772671 2284 17228 ami 5173.987699 14338.012301 -40.177536 24.135234
334523 5822 4162843 66 6442 op 1725.723244 4782.276756 -39.953065 24.000391
163727 1436918 6975226 7932 38644 découvrir 12350.535622 34225.464378 -39.759001 23.883814
377224 125561 2227529 1132 11936 recette 3465.235304 9602.764696 -39.636215 23.810055
278196 335581 3535564 1437 12985 live 3824.274836 10597.725164 -38.603606 23.189751
176891 568284 5043973 2690 18287 ensemble 5562.461048 15414.538952 -38.514180 23.136032
461195 3212547 23340821 9320 42865 vie 13837.871467 38347.128533 -38.406013 23.071055
451567 24611 992869 139 6354 une 1721.745702 4771.254298 -38.144036 22.913681
59399 474552 12145865 1436 12786 amour 3771.240931 10450.759069 -38.026780 22.843244
393719 58761 930736 527 8297 réservation 2339.855856 6484.144144 -37.477337 22.513185
353526 220403 3155573 1132 11177 plaisir 3263.971637 9045.028363 -37.317130 22.416947
465257 3389 875844 48 5542 voor 1482.297624 4107.702376 -37.253885 22.378955
In [24]:
fig = px.bar(graph_nonmedia1, x="res_nonmedia", y="mot", labels={"res_nonmedia":"Résiduel de Pearson","mot":"Lemme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="darkorange", opacity=0.75, textfont_size=12)
fig
In [25]:
# Tentative d'utiliser le logarithme naturel des interactions; donne des résultats ininterprétables
# khi2_1["interactions_media_log"] = np.where(khi2_1.interactions_media == 0, 0, np.log(khi2_1.interactions_media+1e-12))
# khi2_1["interactions_nonmedia_log"] = np.where(khi2_1.interactions_nonmedia == 0, 0, np.log(khi2_1.interactions_nonmedia+1e-12))
# khi2_1
In [26]:
khi2_1["exp_inter_media"] = ((khi2_1.interactions_media + khi2_1.interactions_nonmedia) * khi2_1.interactions_media.sum()) / (khi2_1.interactions_media.sum() + khi2_1.interactions_nonmedia.sum())
# khi2_1["exp_inter_media"] = ((khi2_1.interactions_media_log + khi2_1.interactions_nonmedia_log) * khi2_1.interactions_media_log.sum()) / (khi2_1.interactions_media_log.sum() + khi2_1.interactions_nonmedia_log.sum())
khi2_1["exp_inter_nonmedia"] = ((khi2_1.interactions_media + khi2_1.interactions_nonmedia) * khi2_1.interactions_nonmedia.sum()) / (khi2_1.interactions_media.sum() + khi2_1.interactions_nonmedia.sum())
# khi2_1["exp_inter_nonmedia"] = ((khi2_1.interactions_media_log + khi2_1.interactions_nonmedia_log) * khi2_1.interactions_nonmedia_log.sum()) / (khi2_1.interactions_media_log.sum() + khi2_1.interactions_nonmedia_log.sum())
khi2_1
Out[26]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia
0 0 79 0 1 #1 0.265170 0.734830 -0.514946 0.309336 15.968818 63.031182
1 70 0 1 0 #106cabrel 0.265170 0.734830 1.427005 -0.857223 14.149586 55.850414
2 0 24 0 3 #2 0.795509 2.204491 -0.891913 0.535785 4.851287 19.148713
3 0 826 0 1 #2020toussolidaire 0.265170 0.734830 -0.514946 0.309336 166.965111 659.034889
4 0 693 0 1 #21juillet 0.265170 0.734830 -0.514946 0.309336 140.080898 552.919102
... ... ... ... ... ... ... ... ... ... ... ...
506543 0 85 0 2 single 0.530339 1.469661 -0.728244 0.437467 17.181640 67.818360
506544 0 12 0 1 solidaire 0.265170 0.734830 -0.514946 0.309336 2.425643 9.574357
506545 0 13 0 1 watch 0.265170 0.734830 -0.514946 0.309336 2.627780 10.372220
506546 0 12 0 1 𐀁мвя 0.265170 0.734830 -0.514946 0.309336 2.425643 9.574357
506547 0 11 0 1 𐌾ᗩïᗩᙅ 0.265170 0.734830 -0.514946 0.309336 2.223506 8.776494

506548 rows × 11 columns

In [27]:
khi2_1["res_inter_media"] = (khi2_1.interactions_media - khi2_1.exp_inter_media) / np.sqrt(khi2_1.exp_inter_media)
# khi2_1["res_inter_media"] = (khi2_1.interactions_media_log - khi2_1.exp_inter_media) / np.sqrt(khi2_1.exp_inter_media)
khi2_1["res_inter_nonmedia"] = (khi2_1.interactions_nonmedia - khi2_1.exp_inter_nonmedia) / np.sqrt(khi2_1.exp_inter_nonmedia)
# khi2_1["res_inter_nonmedia"] = (khi2_1.interactions_nonmedia_log - khi2_1.exp_inter_nonmedia) / np.sqrt(khi2_1.exp_inter_nonmedia)
khi2_1
Out[27]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
0 0 79 0 1 #1 0.265170 0.734830 -0.514946 0.309336 15.968818 63.031182 -3.996100 2.011384
1 70 0 1 0 #106cabrel 0.265170 0.734830 1.427005 -0.857223 14.149586 55.850414 14.847541 -7.473313
2 0 24 0 3 #2 0.795509 2.204491 -0.891913 0.535785 4.851287 19.148713 -2.202564 1.108631
3 0 826 0 1 #2020toussolidaire 0.265170 0.734830 -0.514946 0.309336 166.965111 659.034889 -12.921498 6.503865
4 0 693 0 1 #21juillet 0.265170 0.734830 -0.514946 0.309336 140.080898 552.919102 -11.835578 5.957281
... ... ... ... ... ... ... ... ... ... ... ... ... ...
506543 0 85 0 2 single 0.530339 1.469661 -0.728244 0.437467 17.181640 67.818360 -4.145074 2.086368
506544 0 12 0 1 solidaire 0.265170 0.734830 -0.514946 0.309336 2.425643 9.574357 -1.557448 0.783921
506545 0 13 0 1 watch 0.265170 0.734830 -0.514946 0.309336 2.627780 10.372220 -1.621043 0.815931
506546 0 12 0 1 𐀁мвя 0.265170 0.734830 -0.514946 0.309336 2.425643 9.574357 -1.557448 0.783921
506547 0 11 0 1 𐌾ᗩïᗩᙅ 0.265170 0.734830 -0.514946 0.309336 2.223506 8.776494 -1.491143 0.750547

506548 rows × 13 columns

In [28]:
graph_interactions_media1 = khi2_1.sort_values(by=["res_inter_media"],ascending=False).head(50)
graph_interactions_media1
Out[28]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
132650 19325018 14609115 65421 40483 coronavirus 28082.512980 77821.487020 222.812080 -133.846480 6.859342e+06 2.707479e+07 4759.645684 -2395.704704
85116 11838300 9946661 39230 36723 belgique 20140.420648 55812.579352 134.512328 -80.803526 4.403545e+06 1.738142e+07 3542.953148 -1783.298608
151790 3824512 613719 7715 2487 diable 2705.259456 7496.740544 96.318735 -57.860075 8.971304e+05 3.541101e+06 3090.662217 -1555.643950
84949 7968684 6626220 29100 25891 belge 14581.937144 40409.062856 120.226737 -72.221962 2.950169e+06 1.164473e+07 2921.808490 -1470.653660
223337 3107469 359102 3590 757 hazard 1152.691909 3194.308091 71.788323 -43.124297 7.007220e+05 2.765849e+06 2875.130355 -1447.158838
285657 2778591 498829 3319 1262 madrid 1214.741577 3366.258423 60.374998 -36.268145 6.624876e+05 2.614932e+06 2599.848166 -1308.599189
169632 2568809 355647 3001 1179 eden 1108.408599 3071.591401 56.846917 -34.148776 5.911406e+05 2.333315e+06 2572.219887 -1294.692860
376721 2870626 635009 3426 1682 real 1354.485915 3753.514085 56.286033 -33.811845 7.086183e+05 2.797017e+06 2568.331287 -1292.735585
389330 3887735 2284497 10170 10484 rouge 5476.811292 15177.188708 63.416771 -38.095384 1.247636e+06 4.924596e+06 2363.612283 -1189.692981
282524 1843184 306054 2250 812 lukaku 811.949074 2250.050926 50.467277 -30.316432 4.344404e+05 1.714798e+06 2137.306780 -1075.785100
270878 2243930 1023084 8705 5258 league 3702.562025 10260.437975 82.211116 -49.385421 6.603842e+05 2.606630e+06 1948.643030 -980.823696
445838 2212544 1126940 10607 4776 trump 4079.102746 11303.897254 102.209435 -61.398705 6.750331e+05 2.664451e+06 1871.352869 -941.920716
101606 1594791 421083 1952 413 bruyne 627.125918 1737.874082 52.905062 -31.780846 4.074826e+05 1.608391e+06 1859.983144 -936.197916
61895 4996615 6088391 21922 19685 annoncer 11032.908271 30574.091729 103.668461 -62.275163 2.240689e+06 8.844317e+06 1841.097203 -926.691927
60496 1647774 504751 8958 3632 anderlecht 3338.484273 9251.515727 97.257751 -58.424156 4.351048e+05 1.717420e+06 1838.422810 -925.345806
134360 1191936 102166 1455 352 courtois 479.161325 1327.838675 44.579701 -26.779679 2.615858e+05 1.032516e+06 1819.027869 -915.583619
134732 5833072 7996479 24467 29475 covid-19 14303.774316 39638.225684 84.977949 -51.047499 2.795463e+06 1.103409e+07 1816.791029 -914.457736
90053 1594174 509468 6908 2791 bilan 2571.879187 7127.120813 85.501964 -51.362282 4.252238e+05 1.678418e+06 1792.616006 -902.289557
298826 5825958 8170334 22525 42177 mesure 17156.998365 47545.001635 40.981890 -24.618422 2.829168e+06 1.116712e+07 1781.669025 -896.779540
388272 1201758 170355 1448 339 romelu 473.857934 1313.142066 44.750534 -26.882301 2.773547e+05 1.094758e+06 1755.269058 -883.491465
356078 4406320 5342504 14062 17732 police 8430.799759 23363.200241 61.329099 -36.841288 1.970597e+06 7.778227e+06 1735.117397 -873.348393
163866 2163216 1355137 8557 5302 décès 3674.984395 10184.015605 80.532543 -48.377078 7.111891e+05 2.807164e+06 1721.797668 -866.644084
59979 13791596 29344381 39789 64040 an 27532.286223 76296.713777 73.867416 -44.373239 8.719374e+06 3.441660e+07 1717.732637 -864.598004
282842 4801225 6327805 21076 26267 lundi 12553.920645 34789.079355 76.059924 -45.690310 2.249588e+06 8.879442e+06 1701.247053 -856.300203
487072 2305015 1620938 8960 6472 épidémie 4092.096052 11339.903948 76.097272 -45.712746 7.935801e+05 3.132373e+06 1696.656995 -853.989859
103379 3235828 3218756 5938 8943 but 3945.987646 10935.012354 31.711309 -19.049448 1.304710e+06 5.149874e+06 1690.642870 -850.962728
298740 1432069 503902 1451 666 messi 561.363877 1555.636123 37.548291 -22.555809 3.913312e+05 1.544640e+06 1663.677117 -837.389873
101480 4841672 6706056 18777 31381 bruxelles 13300.372847 36857.627153 47.487714 -28.526566 2.334222e+06 9.213506e+06 1641.198467 -826.075542
232582 3917914 4788672 10125 10201 hôpital 5389.835689 14936.164311 64.498155 -38.744986 1.759923e+06 6.946663e+06 1626.682951 -818.769349
397846 4780848 6655026 17251 26179 samedi 11516.312308 31913.687692 53.438352 -32.101201 2.311613e+06 9.124261e+06 1624.070887 -817.454601
302676 4377445 5782810 17955 22984 ministre 10855.775031 30083.224969 68.136610 -40.930660 2.053763e+06 8.106492e+06 1621.442604 -816.131690
482163 970078 99946 795 261 zidane 280.019014 775.980986 30.774955 -18.486967 2.162914e+05 8.537326e+05 1620.797757 -815.807114
249970 3958967 4938957 15846 19397 jeudi 9345.369438 25897.630562 67.244574 -40.394801 1.798599e+06 7.099325e+06 1610.870123 -810.810171
132173 1729966 964487 391 507 cordy 238.122230 659.877770 9.907050 -5.951310 5.446485e+05 2.149805e+06 1606.114700 -808.416592
188310 4489604 6115559 14809 21250 face 9561.747767 26497.252233 53.661561 -32.235286 2.143695e+06 8.461468e+06 1602.247543 -806.470109
457748 4694112 6621349 18947 26733 vendredi 12112.943731 33567.056269 62.094581 -37.301124 2.287273e+06 9.028188e+06 1591.432191 -801.026345
157498 1427126 616647 6287 2621 donald 2362.130095 6545.869905 80.755732 -48.511151 4.131220e+05 1.630651e+06 1577.613056 -794.070666
163879 2690518 2578659 6311 4299 décéder 2813.448621 7796.551379 65.939342 -39.610729 1.065095e+06 4.204082e+06 1574.969272 -792.739952
89157 1111997 273703 3794 1663 biden 1447.030078 4009.969922 61.697667 -37.062692 2.801012e+05 1.105599e+06 1571.851471 -791.170649
325957 8847306 17145420 41255 65168 nouveau 28220.135961 78202.864039 77.593788 -46.611725 5.254090e+06 2.073864e+07 1567.597509 -789.029473
356138 2561931 2379873 5930 6846 policier 3387.805804 9388.194196 43.676665 -26.237212 9.989211e+05 3.942883e+06 1563.853699 -787.145076
61795 1740244 1082751 439 806 annie 330.136054 914.863946 5.991528 -3.599199 5.706316e+05 2.252363e+06 1548.331971 -779.332419
109634 4144422 5605739 19009 22028 cas 10881.761644 30155.238356 77.910034 -46.801698 1.970868e+06 7.779293e+06 1548.252401 -779.292369
298109 3672515 4664767 17463 19992 mercredi 9931.924419 27523.075581 75.568413 -45.395053 1.685273e+06 6.652009e+06 1530.790458 -770.503131
436217 859396 85572 1184 463 thibaut 436.734202 1210.265798 35.757447 -21.480022 1.910129e+05 7.539551e+05 1529.304772 -769.755330
59873 2115405 1717005 10378 8005 américain 4874.611309 13508.388691 78.824312 -47.350919 7.746716e+05 3.057738e+06 1523.293532 -766.729652
258479 1320595 570812 1999 1103 kevin 822.555855 2279.444145 41.019335 -24.640915 3.823232e+05 1.509084e+06 1517.446254 -763.786502
117186 1866221 1391811 7945 4551 chiffre 3313.558338 9182.441662 80.457936 -48.332261 6.585686e+05 2.599463e+06 1488.131915 -749.031517
419647 1562305 947363 9051 4036 standard 3470.273525 9616.726475 94.734656 -56.908496 5.072966e+05 2.002371e+06 1481.238317 -745.561716
366329 3230123 3949865 16141 20202 président 9637.055911 26705.944089 66.252857 -39.799062 1.451341e+06 5.728647e+06 1476.515729 -743.184664
In [29]:
fig = px.bar(graph_interactions_media1, x="res_inter_media", y="mot", labels={"res_inter_media":"Résiduel de Pearson (<b>en fonction des interactions</b>)","mot":"Lemme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="gold", opacity=0.75, textfont_size=12)
fig
In [30]:
graph_interactions_nonmedia1 = khi2_1.sort_values(by=["res_inter_nonmedia"],ascending=False).head(50)
graph_interactions_nonmedia1
Out[30]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
358150 589111 19280588 2638 62212 post 17196.243454 47653.756546 -111.017690 66.689952 4.016400e+06 1.585330e+07 -1710.142326 860.777521
201650 450691 17394426 1252 53169 from 14430.790517 39990.209483 -109.706029 65.902018 3.607157e+06 1.423796e+07 -1661.952536 836.521828
94785 3274888 34801436 14457 68346 bon 21956.831869 60846.168131 -50.613537 30.404293 7.696632e+06 3.037969e+07 -1593.834420 802.235475
349213 3116610 31943611 12426 54765 petit 17817.005303 49373.994697 -40.388005 24.261666 7.086966e+06 2.797326e+07 -1491.417559 750.685303
340701 416861 14410454 2132 23092 page 6688.636004 18535.363996 -55.715439 33.469080 2.997148e+06 1.183017e+07 -1490.438034 750.192272
28573 89325 11357803 338 12178 3 3318.861728 9197.138272 -51.742481 31.082466 2.313887e+06 9.133241e+06 -1462.424593 736.092077
350552 2568016 28083848 8714 85736 photo 25045.261283 69404.738717 -103.194593 61.990504 6.195874e+06 2.445599e+07 -1457.467854 733.597168
59399 474552 12145865 1436 12786 amour 3771.240931 10450.759069 -38.026780 22.843244 2.551052e+06 1.006936e+07 -1300.086759 654.381475
488547 2070626 22454212 7169 51108 être 15453.284191 42823.715809 -66.641474 40.032509 4.957376e+06 1.956746e+07 -1296.531902 652.592185
188929 6160031 44825968 25377 102298 faire 33855.518627 93819.481373 -46.079233 27.680470 1.030615e+07 4.067985e+07 -1291.498314 650.058595
340959 359089 10552726 708 2920 pairi 962.035023 2665.964977 -8.190269 4.920014 2.205681e+06 8.706134e+06 -1243.367699 625.832687
480259 194471 9270260 662 6627 youtube 1932.820640 5356.179360 -28.906032 17.364277 1.913172e+06 7.551559e+06 -1242.576928 625.434663
433699 1921293 20189319 9523 41004 temps 13398.220401 37128.779599 -33.479029 20.111343 4.469371e+06 1.764124e+07 -1205.284479 606.664001
400404 1219960 15879803 6047 30383 savoir 9660.125660 26769.874340 -36.761369 22.083093 3.456494e+06 1.364327e+07 -1202.977669 605.502899
393678 8 7108753 1 680 résa 180.580444 500.419556 -13.363607 8.027714 1.436943e+06 5.671818e+06 -1198.718975 603.359342
152655 107371 7476766 386 10792 dieu 2964.064909 8213.935091 -47.353272 28.445804 1.533034e+06 6.051103e+06 -1151.439231 579.561708
457973 374426 9485232 2313 4866 vent 1903.651993 5275.348007 9.382075 -5.635950 1.993001e+06 7.866657e+06 -1146.513260 577.082286
53517 207929 8088620 622 8707 aime 2473.766464 6855.233536 -37.231186 22.365319 1.677039e+06 6.619510e+06 -1134.443145 571.006953
58920 1237266 14772671 2284 17228 ami 5173.987699 14338.012301 -40.177536 24.135234 3.236200e+06 1.277374e+07 -1111.170554 559.293002
53548 341488 8451007 1034 12027 aimer 3463.379117 9597.620883 -41.280531 24.797820 1.777288e+06 7.015207e+06 -1076.998373 542.092887
95118 59687 5794529 449 7101 bonjour 2002.029886 5547.970114 -34.709194 20.850322 1.183353e+06 4.670863e+06 -1032.952003 519.922729
241010 764384 10661017 1993 7370 instagram 2482.782228 6880.217772 -9.829552 5.904756 2.309496e+06 9.115905e+06 -1016.719792 511.752460
233293 1293568 13928809 6202 33565 ici 10544.996352 29222.003648 -42.292781 25.405893 3.077005e+06 1.214537e+07 -1016.701852 511.743431
413622 299181 7447541 1776 6351 soleil 2155.032699 5971.967301 -8.164879 4.904762 1.565899e+06 6.180823e+06 -1012.273808 509.514633
435543 570475 9326705 2785 26021 the 7638.473229 21167.526771 -55.532773 33.359350 2.000586e+06 7.896594e+06 -1011.092916 508.920247
56512 5433 5089805 16 9989 allah 2653.021060 7351.978940 -51.196851 30.754698 1.029936e+06 4.065302e+06 -1009.504069 508.120522
441641 493 4985044 11 199 touré 55.685599 154.314401 -5.988198 3.597198 1.007761e+06 3.977776e+06 -1003.381993 505.039055
141836 364401 7792773 710 2192 daiza 769.521951 2132.478049 -2.145688 1.288946 1.648866e+06 6.508308e+06 -1000.298527 503.487033
84707 1328658 13810089 5437 25223 bel 8130.097522 22529.902478 -29.867867 17.942065 3.060100e+06 1.207865e+07 -989.783322 498.194344
158727 0 4731876 0 90 dougourouni 23.865257 66.134743 -4.885208 2.934616 9.564869e+05 3.775389e+06 -978.001495 492.264118
124635 93609 5331268 354 6979 collection 1944.488099 5388.511901 -36.068475 21.666861 1.096568e+06 4.328309e+06 -957.779201 482.085493
75786 1652840 15207630 5267 37873 avoir 11439.413147 31700.586853 -57.710271 34.667405 3.408124e+06 1.345235e+07 -950.801064 478.573140
276731 0 4456691 0 70 likeetpartage 18.561866 51.438134 -4.308348 2.588088 9.008619e+05 3.555829e+06 -949.137435 477.735775
461195 3212547 23340821 9320 42865 vie 13837.871467 38347.128533 -38.406013 23.071055 5.367416e+06 2.118595e+07 -930.118591 468.162891
118203 1009296 11210836 3449 17652 chose 5595.342068 15505.657932 -28.693638 17.236689 2.470140e+06 9.749992e+06 -929.486465 467.844719
343768 866330 10256637 3083 22249 partager 6717.274313 18614.725687 -44.342601 26.637250 2.248362e+06 8.874605e+06 -921.690449 463.920697
334523 5822 4162843 66 6442 op 1725.723244 4782.276756 -39.953065 24.000391 8.426412e+05 3.326024e+06 -911.612529 458.848110
403327 117352 5037946 997 3271 sec 1131.743517 3136.256483 -4.005289 2.406036 1.042076e+06 4.113222e+06 -905.862897 455.954108
343875 123193 4854726 839 6384 partenaire 1915.319452 5307.680548 -24.593514 14.773684 1.006221e+06 3.971698e+06 -880.294260 443.084473
259268 58739 4330704 242 1094 kids 354.266480 981.733520 -5.964652 3.583054 8.872686e+05 3.502174e+06 -879.590364 442.730176
61832 586371 8041785 1551 6874 anniversaire 2234.053217 6190.946783 -14.451329 8.681125 1.744069e+06 6.884087e+06 -876.624208 441.237201
347439 84880 4510160 276 4191 pensée 1184.512251 3282.487749 -26.397391 15.857300 9.288273e+05 3.666213e+06 -875.684871 440.764398
147261 525518 7544571 2582 14772 demain 4601.751872 12752.248128 -29.773948 17.885646 1.631263e+06 6.438826e+06 -865.751019 435.764325
333717 4701 3684615 46 1954 ons 530.339043 1469.660957 -21.031618 12.634001 7.457470e+05 2.943569e+06 -858.122761 431.924742
127843 234299 5490803 1398 11826 concours 3506.601749 9717.398251 -35.608308 21.390432 1.157255e+06 4.567847e+06 -857.958631 431.842130
237026 419335 6675426 1569 19993 in 5717.585217 15844.414783 -54.864786 32.958081 1.434113e+06 5.660648e+06 -847.382381 426.518714
201597 327951 6041988 1314 4784 froid 1617.003741 4480.996259 -7.535160 4.526481 1.287600e+06 5.082339e+06 -845.710886 425.677389
322758 1315 3541844 27 3088 nl 826.003059 2288.996941 -27.800821 16.700360 7.162033e+05 2.826956e+06 -844.734115 425.185744
113184 0 3524700 0 41 cesttouss 10.871950 30.128050 -3.297264 1.980715 7.124721e+05 2.812228e+06 -844.080603 424.856808
154006 561896 7570124 1847 11635 dire 3575.015486 9906.984514 -28.900720 17.361086 1.643782e+06 6.488238e+06 -843.838408 424.734902
In [31]:
fig = px.bar(graph_interactions_nonmedia1, x="res_inter_nonmedia", y="mot", labels={"res_inter_nonmedia":"Résiduel de Pearson (<b>en fonction des interactions</b>)","mot":"Lemme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="lightyellow", opacity=0.75, textfont_size=12)
fig

bigrammes

In [32]:
media2 = pan.read_csv("belgique-bigrammes-media-nettoye.csv", low_memory=False, names=["bigramme","interactions"])
nonmedia2 = pan.read_csv("belgique-bigrammes-nonmedia-nettoye.csv", low_memory=False, names=["bigramme","interactions"])
In [33]:
media2
Out[33]:
bigramme interactions
0 vie bienvenue 175299
1 bienvenue émilie 175299
2 émilie dylan 175299
3 dylan faire 175299
4 faire tour 175299
... ... ...
8111423 gardien timon 7
8111424 timon wellenreuther 7
8111425 wellenreuther club 7
8111426 club bruxellois 7
8111427 bruxellois renforcer 7

8111428 rows × 2 columns

In [34]:
nonmedia2
Out[34]:
bigramme interactions
0 moment tendresse 224233
1 tendresse bébé 224233
2 bébé bébé 224233
3 bébé apprenon 224233
4 apprenon changer 224233
... ... ...
22718060 voter ici 5
22718061 ici visitgaume 5
22718062 visitgaume soleildegaume 5
22718063 soleildegaume voter 5
22718064 voter logo 5

22718065 rows × 2 columns

In [35]:
media2_table = pan.pivot_table(media2,index=["bigramme"],values=["bigramme","interactions"],aggfunc=[len,np.sum])
nonmedia2_table = pan.pivot_table(nonmedia2,index=["bigramme"],values=["bigramme","interactions"],aggfunc=[len,np.sum])
In [36]:
media2_table
Out[36]:
len sum
interactions interactions
bigramme
#balades travail 1 45
#breath4life masque 1 9
#cestbienlecas lidar 1 422
#confinemer coronavirus 1 49
#mobilité écologie 1 26
... ... ...
그ー ln24be 1 34
그切 ln24be 1 36
나4 oo 1 5
무를 oand 1 414
제이행 chadwick 1 2645

2915288 rows × 2 columns

In [37]:
nonmedia2_table
Out[37]:
len sum
interactions interactions
bigramme
#1 cafes 1 79
#2 continue 1 6
#2 spectacle 2 18
#2020toussolidaire vivaforlife 1 826
#21juillet belgique 1 693
... ... ...
single streaming 1 12
solidaire en 1 12
watch concert 1 13
𐀁мвя иσιя 1 12
𐌾ᗩïᗩᙅ lumière 1 11

7862831 rows × 2 columns

In [38]:
media2_table["type"] = "media"
nonmedia2_table["type"] = "non-media"
In [39]:
media2_table
Out[39]:
len sum type
interactions interactions
bigramme
#balades travail 1 45 media
#breath4life masque 1 9 media
#cestbienlecas lidar 1 422 media
#confinemer coronavirus 1 49 media
#mobilité écologie 1 26 media
... ... ... ...
그ー ln24be 1 34 media
그切 ln24be 1 36 media
나4 oo 1 5 media
무를 oand 1 414 media
제이행 chadwick 1 2645 media

2915288 rows × 3 columns

In [40]:
nonmedia2_table
Out[40]:
len sum type
interactions interactions
bigramme
#1 cafes 1 79 non-media
#2 continue 1 6 non-media
#2 spectacle 2 18 non-media
#2020toussolidaire vivaforlife 1 826 non-media
#21juillet belgique 1 693 non-media
... ... ... ...
single streaming 1 12 non-media
solidaire en 1 12 non-media
watch concert 1 13 non-media
𐀁мвя иσιя 1 12 non-media
𐌾ᗩïᗩᙅ lumière 1 11 non-media

7862831 rows × 3 columns

In [41]:
tableau2 = pan.concat([media2_table,nonmedia2_table], names = ["bigramme","nb","interactions","type"], levels=0)
In [42]:
tableau2
Out[42]:
len sum type
interactions interactions
bigramme
#balades travail 1 45 media
#breath4life masque 1 9 media
#cestbienlecas lidar 1 422 media
#confinemer coronavirus 1 49 media
#mobilité écologie 1 26 media
... ... ... ...
single streaming 1 12 non-media
solidaire en 1 12 non-media
watch concert 1 13 non-media
𐀁мвя иσιя 1 12 non-media
𐌾ᗩïᗩᙅ lumière 1 11 non-media

10778119 rows × 3 columns

In [43]:
tableau2["bigramme"] = tableau2.index
tableau2.columns = ["nb","interactions","media","bigramme"]
tableau2.reset_index(drop=True, inplace=True)
tableau2
Out[43]:
nb interactions media bigramme
0 1 45 media #balades travail
1 1 9 media #breath4life masque
2 1 422 media #cestbienlecas lidar
3 1 49 media #confinemer coronavirus
4 1 26 media #mobilité écologie
... ... ... ... ...
10778114 1 12 non-media single streaming
10778115 1 12 non-media solidaire en
10778116 1 13 non-media watch concert
10778117 1 12 non-media 𐀁мвя иσιя
10778118 1 11 non-media 𐌾ᗩïᗩᙅ lumière

10778119 rows × 4 columns

In [44]:
khi2_2 = pan.pivot_table(tableau2,index=["bigramme"],columns=["media"],values=["bigramme","interactions"],aggfunc=[len,np.sum],fill_value=0)
In [45]:
khi2_2
Out[45]:
len sum
interactions nb interactions nb
media media non-media media non-media media non-media media non-media
bigramme
#1 cafes 0 1 0 1 0 79 0 1
#2 continue 0 1 0 1 0 6 0 1
#2 spectacle 0 1 0 1 0 18 0 2
#2020toussolidaire vivaforlife 0 1 0 1 0 826 0 1
#21juillet belgique 0 1 0 1 0 693 0 1
... ... ... ... ... ... ... ... ...
single streaming 0 1 0 1 0 12 0 1
solidaire en 0 1 0 1 0 12 0 1
watch concert 0 1 0 1 0 13 0 1
𐀁мвя иσιя 0 1 0 1 0 12 0 1
𐌾ᗩïᗩᙅ lumière 0 1 0 1 0 11 0 1

9562525 rows × 8 columns

In [46]:
khi2_2["bigramme"] = khi2_2.index
khi2_2.columns = khi2_2.columns.get_level_values(0)
khi2_2.reset_index(drop=True, inplace=True)
khi2_2.columns = ["a","b","c","d","interactions_media","interactions_nonmedia","obs_media","obs_nonmedia","bigramme"]
khi2_2 = khi2_2.drop(columns=["a","b","c","d"])
khi2_2
Out[46]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme
0 0 79 0 1 #1 cafes
1 0 6 0 1 #2 continue
2 0 18 0 2 #2 spectacle
3 0 826 0 1 #2020toussolidaire vivaforlife
4 0 693 0 1 #21juillet belgique
... ... ... ... ... ...
9562520 0 12 0 1 single streaming
9562521 0 12 0 1 solidaire en
9562522 0 13 0 1 watch concert
9562523 0 12 0 1 𐀁мвя иσιя
9562524 0 11 0 1 𐌾ᗩïᗩᙅ lumière

9562525 rows × 5 columns

In [47]:
khi2_2.obs_media.sum(), khi2_2.obs_nonmedia.sum()
Out[47]:
(8111428, 22718065)
In [48]:
print("Nb de lignes fichier media2 = ", media2.shape[0])
print("Somme observée média khi2_2 = ", khi2_2.obs_media.sum())

print("Nb de lignes  fichier nonmedia2 = ", nonmedia2.shape[0])
print("Somme observée nonmédia khi2_2 = ", khi2_2.obs_nonmedia.sum())
Nb de lignes fichier media2 =  8111428
Somme observée média khi2_2 =  8111428
Nb de lignes  fichier nonmedia2 =  22718065
Somme observée nonmédia khi2_2 =  22718065
In [49]:
khi2_2["exp_media"] = ((khi2_2.obs_media + khi2_2.obs_nonmedia) * khi2_2.obs_media.sum()) / (khi2_2.obs_media.sum() +khi2_2.obs_nonmedia.sum())
khi2_2["exp_nonmedia"] = ((khi2_2.obs_media + khi2_2.obs_nonmedia) * khi2_2.obs_nonmedia.sum()) / (khi2_2.obs_media.sum() +khi2_2.obs_nonmedia.sum())
khi2_2
Out[49]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia
0 0 79 0 1 #1 cafes 0.263106 0.736894
1 0 6 0 1 #2 continue 0.263106 0.736894
2 0 18 0 2 #2 spectacle 0.526212 1.473788
3 0 826 0 1 #2020toussolidaire vivaforlife 0.263106 0.736894
4 0 693 0 1 #21juillet belgique 0.263106 0.736894
... ... ... ... ... ... ... ...
9562520 0 12 0 1 single streaming 0.263106 0.736894
9562521 0 12 0 1 solidaire en 0.263106 0.736894
9562522 0 13 0 1 watch concert 0.263106 0.736894
9562523 0 12 0 1 𐀁мвя иσιя 0.263106 0.736894
9562524 0 11 0 1 𐌾ᗩïᗩᙅ lumière 0.263106 0.736894

9562525 rows × 7 columns

In [50]:
khi2_2["res_media"] = (khi2_2.obs_media - khi2_2.exp_media) / np.sqrt(khi2_2.exp_media)
khi2_2["res_nonmedia"] = (khi2_2.obs_nonmedia - khi2_2.exp_nonmedia) / np.sqrt(khi2_2.exp_nonmedia)
khi2_2
Out[50]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia
0 0 79 0 1 #1 cafes 0.263106 0.736894 -0.512939 0.306499
1 0 6 0 1 #2 continue 0.263106 0.736894 -0.512939 0.306499
2 0 18 0 2 #2 spectacle 0.526212 1.473788 -0.725405 0.433454
3 0 826 0 1 #2020toussolidaire vivaforlife 0.263106 0.736894 -0.512939 0.306499
4 0 693 0 1 #21juillet belgique 0.263106 0.736894 -0.512939 0.306499
... ... ... ... ... ... ... ... ... ...
9562520 0 12 0 1 single streaming 0.263106 0.736894 -0.512939 0.306499
9562521 0 12 0 1 solidaire en 0.263106 0.736894 -0.512939 0.306499
9562522 0 13 0 1 watch concert 0.263106 0.736894 -0.512939 0.306499
9562523 0 12 0 1 𐀁мвя иσιя 0.263106 0.736894 -0.512939 0.306499
9562524 0 11 0 1 𐌾ᗩïᗩᙅ lumière 0.263106 0.736894 -0.512939 0.306499

9562525 rows × 9 columns

In [51]:
graph_media2 = khi2_2.sort_values(by=["res_media"],ascending=False).head(50)
graph_media2
Out[51]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia
2324145 3052404 375839 7287 998 coronavirus belgique 2179.834128 6105.165872 109.387637 -65.362887
2664379 2977596 151767 5464 1019 diable rouge 1705.716916 4777.283084 90.998872 -54.374966
2801236 1343784 550471 6086 2338 donald trump 2216.405877 6207.594123 82.194123 -49.113825
5986064 828623 89838 3868 1205 nouveau cas 1334.737300 3738.262700 69.339759 -41.432899
5024298 194241 3907 2213 49 libre afrique 595.146023 1666.853977 66.317409 -39.626941
6837077 480082 173768 3985 1790 pro league 1519.437790 4255.562210 63.252035 -37.795274
3096480 2401438 236542 2583 485 eden hazard 807.209548 2260.790452 62.502670 -37.347502
1528012 41529 2849 1930 34 business am 516.740402 1447.259598 62.170697 -37.149137
9431033 999988 386521 3371 1585 épidémie coronavirus 1303.953885 3652.046115 57.242560 -34.204405
234789 617764 341839 2669 931 24 heure 947.181999 2652.818001 55.946191 -33.429780
7226808 2255940 294881 2442 735 real madrid 835.888114 2341.111886 55.552259 -33.194391
6269574 606184 309494 3205 1578 pandémie coronavirus 1258.436528 3524.563472 54.872268 -32.788074
5056072 905840 607665 2876 1268 ligue champion 1090.311723 3053.688277 54.079206 -32.314192
8606113 328448 58140 1847 333 tour france 573.571321 1606.428679 53.171745 -31.771952
1663963 453261 89470 1997 460 cas coronavirus 646.451714 1810.548286 53.118044 -31.739864
4755359 862091 193155 2644 1177 joe biden 1005.328449 2815.671551 51.681835 -30.881680
8502943 561911 262722 2568 1150 tester positif 978.228520 2739.771480 50.829348 -30.372290
8901993 190149 5485 1483 154 vedia television 430.704703 1206.295297 50.704654 -30.297780
8445370 190149 5485 1483 154 television local 430.704703 1206.295297 50.704654 -30.297780
5114411 190149 5485 1483 154 local region 430.704703 1206.295297 50.704654 -30.297780
7288603 190149 5485 1483 154 region verviers 430.704703 1206.295297 50.704654 -30.297780
8095073 959023 456198 2766 1384 sophie wilmès 1091.890360 3058.109640 50.663406 -30.273133
480138 75535 14270 1449 143 actualité région 418.864928 1173.135072 50.333505 -30.076006
7317733 464121 17642 1223 52 remco evenepoel 335.460291 939.539709 48.458195 -28.955444
4873951 1251040 292038 1454 237 kevin bruyne 444.912433 1246.087567 47.840045 -28.586078
1956022 330349 44153 1570 347 club bruges 504.374414 1412.625586 47.449115 -28.352483
8047399 1239732 507756 2279 1053 soin intensif 876.669561 2455.330439 47.362258 -28.300583
6716964 430545 162194 1850 608 positif coronavirus 646.714820 1811.285180 47.316451 -28.273212
9372640 72109 13859 1286 134 également bx1 373.610677 1046.389323 47.203104 -28.205483
1538157 72074 13859 1284 134 bx1be également 373.084465 1044.915535 47.160079 -28.179775
1538109 72074 13859 1284 134 bx1 télévision 373.084465 1044.915535 47.160079 -28.179775
8769725 72074 13859 1284 134 télévision bx1 373.084465 1044.915535 47.160079 -28.179775
1628669 72074 13859 1284 134 capitale bx1be 373.084465 1044.915535 47.160079 -28.179775
1538092 70936 14191 1274 138 bx1 suivre 371.505828 1040.494172 46.823254 -27.978510
1538018 72074 14401 1284 153 bx1 radio 378.083481 1058.916519 46.590173 -27.839236
7012119 412411 110784 1634 460 président américain 550.944196 1543.055804 46.142065 -27.571476
8388268 1127253 1853299 5021 5103 t il 2663.686265 7460.313735 45.674716 -27.292219
8281317 82914 49166 1365 248 suivre actualité 424.390157 1188.609843 45.659050 -27.282858
7548601 325159 272895 2504 1437 royaume uni 1036.901183 2904.098817 45.560731 -27.224109
5952698 208406 6033 1060 40 nombre moyen 289.416722 810.583278 45.295783 -27.065793
7553890 385493 43255 1639 526 rtc télé 569.624730 1595.375270 44.805953 -26.773103
3360543 560395 174700 1743 640 europa league 626.981862 1756.018138 44.570116 -26.632182
1663998 277158 103426 1596 497 cas covid-19 550.681090 1542.318910 44.544976 -26.617160
5952762 235192 27081 1177 146 nombre nouveau 348.089385 974.910615 44.428568 -26.547602
2260075 266380 30461 1203 167 contamination coronavirus 360.455372 1009.544628 44.377942 -26.517352
8882029 460927 15646 1070 76 van aert 301.519603 844.480397 44.256291 -26.444661
4551976 309311 15575 1035 56 institut santé 287.048767 803.951233 44.146415 -26.379006
5988110 239701 23941 1168 158 nouveau hospitalisation 348.878703 977.121297 43.854179 -26.204385
7515187 1168042 164697 1342 308 romelu lukaku 434.125083 1215.874917 43.573112 -26.036438
5988286 220641 13853 1036 91 nouveau infection 296.520587 830.479413 42.943621 -25.660295
In [52]:
fig = px.bar(graph_media2, x="res_media", y="bigramme", labels={"res_media":"Résiduel de Pearson","bigramme":"Bigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="navy", opacity=0.75, textfont_size=12)
fig
In [53]:
graph_nonmedia2 = khi2_2.sort_values(by=["res_nonmedia"],ascending=False).head(50)
graph_nonmedia2
Out[53]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia
6527313 406762 16320066 1094 48045 photo from 12928.771177 36210.228823 -104.083345 62.193390
6786318 782096 5789921 408 11057 prendre soin 3016.511560 8448.488440 -47.494166 28.379403
1649067 4976 495682 29 8099 caroline taquin 2138.526468 5989.473532 -45.617099 27.257791
8547754 105277 2734947 740 11316 timeline photo 3172.007271 8883.992729 -43.181506 25.802440
8414520 1064 275702 3 4518 taquin bourgmestre 1189.502727 3331.497273 -34.402184 20.556492
5167404 0 80166 0 3878 luc michel 1020.325498 2857.674502 -31.942534 19.086766
1425756 808 220043 6 3444 bourgmestre caroline 907.716082 2542.283918 -29.929179 17.883717
6533982 39011 785119 98 3619 photos from 977.965414 2739.034586 -28.138689 16.813837
2737145 38599 2282315 76 3416 disneyland paris 918.766539 2573.233461 -27.803835 16.613751
8524534 38211 786173 1001 7841 this is 2326.384231 6515.615769 -27.479010 16.419656
260284 269 638020 3 2769 3 3 729.330139 2042.669861 -26.895027 16.070707
6086862 36162 742372 985 7536 of post 2241.927170 6279.072830 -26.546017 15.862161
4653296 36162 742336 985 7535 is re-share 2241.664064 6278.335936 -26.542017 15.859771
7225666 36162 742336 985 7535 re-share of 2241.664064 6278.335936 -26.542017 15.859771
6071899 36335 703669 272 3929 octobre 2020 1105.308771 3095.691229 -25.064794 14.977079
7344888 64456 1217930 494 4963 rendez vous 1435.770046 4021.229954 -24.854351 14.851332
1713380 22357 419968 452 4648 centre culturel 1341.841165 3758.158835 -24.291925 14.515264
1950094 22479 1271855 54 2566 cliquer lien 689.338010 1930.661990 -24.198516 14.459448
6783146 55399 458794 162 3162 prendre contact 874.564712 2449.435288 -24.095078 14.397641
501905 30687 254990 253 3638 administration communal 1023.745877 2867.254123 -24.088798 14.393888
5379768 140930 805252 342 3896 mars 2020 1115.043697 3122.956303 -23.150379 13.833150
7911006 36650 430894 258 3461 septembre 2020 978.491626 2740.508374 -23.032993 13.763008
8414526 895 115867 1 1955 taquin caroline 514.635553 1441.364447 -22.641499 13.529077
5666925 1511 112827 3 1919 moniteur automobile 505.689945 1416.310055 -22.354144 13.357372
2619536 4116 234044 19 2020 der valk 536.473360 1502.526640 -22.341582 13.349866
2901048 19675 529568 228 3180 décembre 2020 896.665625 2511.334375 -22.330258 13.343100
3816823 0 138957 0 1894 from commune 498.322974 1395.677026 -22.323149 13.338852
4423343 721 3529914 10 1918 in het 507.268582 1420.731418 -22.078626 13.192741
5996771 31702 762862 224 3111 novembre 2020 877.458879 2457.541121 -22.059968 13.181592
2744853 7310 1106746 69 2272 disponible ici 615.931405 1725.068595 -22.037721 13.168299
6249892 122256 1559209 500 4396 page facebook 1288.167518 3607.832482 -21.960001 13.121859
4530134 1509 266275 29 1991 inscrire vous 531.474344 1488.525656 -21.795796 13.023740
6249924 2344 242141 11 1765 page fb 467.276453 1308.723547 -21.107709 12.612585
4318632 3564 175846 34 1908 hésiter contacter 510.952067 1431.047933 -21.100107 12.608042
4490416 209 119769 9 1661 info réservation 439.387205 1230.612795 -20.532208 12.268703
6087445 10203 506702 109 2197 of the 606.722691 1699.277309 -20.206557 12.074115
3817947 0 201220 0 1506 from ville 396.237803 1109.762197 -19.905723 11.894356
1380921 15633 3128883 90 2030 bon journée 557.784955 1562.215045 -19.806733 11.835207
3862260 34398 249311 174 2428 février 2020 684.602100 1917.397900 -19.514772 11.660750
8883829 117 227365 3 1467 van het 386.765983 1083.234017 -19.513822 11.660182
7821711 25111 615622 132 2201 saviez vous 613.826556 1719.173444 -19.447684 11.620662
5504889 4826 317601 75 1872 message priver 512.267598 1434.732402 -19.319632 11.544147
1081114 1111 128530 51 1730 auvio voir 468.591983 1312.408017 -19.290999 11.527038
5747048 24 167366 1 1408 mp page 370.716510 1038.283490 -19.202063 11.473895
3496426 30675 578356 179 2400 faire plaisir 678.550660 1900.449340 -19.177341 11.459123
5075949 1297 76501 7 1438 lire suite 380.188330 1064.811670 -19.139415 11.436461
6098902 7170 147127 77 1855 offre emploi 508.321006 1423.678994 -19.130732 11.431272
546081 0 66070 0 1386 afrique media 364.665070 1021.334930 -19.096206 11.410642
4799807 38279 553760 287 2887 journée international 835.098796 2338.901204 -18.966619 11.333209
3282667 504 159211 7 1398 envoyer mp 369.664086 1035.335914 -18.862572 11.271038
In [54]:
fig = px.bar(graph_nonmedia2, x="res_nonmedia", y="bigramme", labels={"res_nonmedia":"Résiduel de Pearson","bigramme":"Bigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="aqua", opacity=0.75, textfont_size=12)
fig
In [55]:
khi2_2["exp_inter_media"] = ((khi2_2.interactions_media + khi2_2.interactions_nonmedia) * khi2_2.interactions_media.sum()) / (khi2_2.interactions_media.sum() + khi2_2.interactions_nonmedia.sum())
khi2_2["exp_inter_nonmedia"] = ((khi2_2.interactions_media + khi2_2.interactions_nonmedia) * khi2_2.interactions_nonmedia.sum()) / (khi2_2.interactions_media.sum() + khi2_2.interactions_nonmedia.sum())
khi2_2
Out[55]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia
0 0 79 0 1 #1 cafes 0.263106 0.736894 -0.512939 0.306499 15.904769 63.095231
1 0 6 0 1 #2 continue 0.263106 0.736894 -0.512939 0.306499 1.207957 4.792043
2 0 18 0 2 #2 spectacle 0.526212 1.473788 -0.725405 0.433454 3.623872 14.376128
3 0 826 0 1 #2020toussolidaire vivaforlife 0.263106 0.736894 -0.512939 0.306499 166.295438 659.704562
4 0 693 0 1 #21juillet belgique 0.263106 0.736894 -0.512939 0.306499 139.519054 553.480946
... ... ... ... ... ... ... ... ... ... ... ...
9562520 0 12 0 1 single streaming 0.263106 0.736894 -0.512939 0.306499 2.415914 9.584086
9562521 0 12 0 1 solidaire en 0.263106 0.736894 -0.512939 0.306499 2.415914 9.584086
9562522 0 13 0 1 watch concert 0.263106 0.736894 -0.512939 0.306499 2.617241 10.382759
9562523 0 12 0 1 𐀁мвя иσιя 0.263106 0.736894 -0.512939 0.306499 2.415914 9.584086
9562524 0 11 0 1 𐌾ᗩïᗩᙅ lumière 0.263106 0.736894 -0.512939 0.306499 2.214588 8.785412

9562525 rows × 11 columns

In [56]:
khi2_2["res_inter_media"] = (khi2_2.interactions_media - khi2_2.exp_inter_media) / np.sqrt(khi2_2.exp_inter_media)
khi2_2["res_inter_nonmedia"] = (khi2_2.interactions_nonmedia - khi2_2.exp_inter_nonmedia) / np.sqrt(khi2_2.exp_inter_nonmedia)
khi2_2
Out[56]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
0 0 79 0 1 #1 cafes 0.263106 0.736894 -0.512939 0.306499 15.904769 63.095231 -3.988078 2.002300
1 0 6 0 1 #2 continue 0.263106 0.736894 -0.512939 0.306499 1.207957 4.792043 -1.099071 0.551812
2 0 18 0 2 #2 spectacle 0.526212 1.473788 -0.725405 0.433454 3.623872 14.376128 -1.903647 0.955767
3 0 826 0 1 #2020toussolidaire vivaforlife 0.263106 0.736894 -0.512939 0.306499 166.295438 659.704562 -12.895559 6.474490
4 0 693 0 1 #21juillet belgique 0.263106 0.736894 -0.512939 0.306499 139.519054 553.480946 -11.811818 5.930375
... ... ... ... ... ... ... ... ... ... ... ... ... ...
9562520 0 12 0 1 single streaming 0.263106 0.736894 -0.512939 0.306499 2.415914 9.584086 -1.554321 0.780380
9562521 0 12 0 1 solidaire en 0.263106 0.736894 -0.512939 0.306499 2.415914 9.584086 -1.554321 0.780380
9562522 0 13 0 1 watch concert 0.263106 0.736894 -0.512939 0.306499 2.617241 10.382759 -1.617789 0.812245
9562523 0 12 0 1 𐀁мвя иσιя 0.263106 0.736894 -0.512939 0.306499 2.415914 9.584086 -1.554321 0.780380
9562524 0 11 0 1 𐌾ᗩïᗩᙅ lumière 0.263106 0.736894 -0.512939 0.306499 2.214588 8.785412 -1.488149 0.747157

9562525 rows × 13 columns

In [57]:
graph_interactions_media2 = khi2_2.sort_values(by=["res_inter_media"],ascending=False).head(50)
graph_interactions_media2
Out[57]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
2664379 2977596 151767 5464 1019 diable rouge 1705.716916 4777.283084 90.998872 -54.374966 630022.749136 2.499340e+06 2957.610890 -1484.931639
2324145 3052404 375839 7287 998 coronavirus belgique 2179.834128 6105.165872 109.387637 -65.362887 690195.122639 2.738048e+06 2843.363305 -1427.571202
3096480 2401438 236542 2583 485 eden hazard 807.209548 2260.790452 62.502670 -37.347502 531094.478898 2.106886e+06 2566.465829 -1288.548917
7226808 2255940 294881 2442 735 real madrid 835.888114 2341.111886 55.552259 -33.194391 513547.088968 2.037274e+06 2431.397465 -1220.734964
7515187 1168042 164697 1342 308 romelu lukaku 434.125083 1215.874917 43.573112 -26.036438 268315.273320 1.064424e+06 1736.952428 -872.074019
4873951 1251040 292038 1454 237 kevin bruyne 444.912433 1246.087567 47.840045 -28.586078 310662.024090 1.232416e+06 1687.168139 -847.078755
772852 1691969 944110 376 473 annie cordy 223.377088 625.622912 10.211756 -6.101876 530711.757799 2.105367e+06 1594.039507 -800.321538
2801236 1343784 550471 6086 2338 donald trump 2216.405877 6207.594123 82.194123 -49.113825 381363.153672 1.512892e+06 1558.459986 -782.458081
8521658 795022 53368 986 151 thibaut courtois 299.151648 837.848352 39.711397 -23.728930 170803.131545 6.775869e+05 1510.389569 -758.323303
3618277 715544 1710 7 4 fils 15 2.894167 8.105833 2.413456 -1.442123 144402.019487 5.728520e+05 1502.994702 -754.610552
8047399 1239732 507756 2279 1053 soin intensif 876.669561 2455.330439 47.362258 -28.300583 351815.111843 1.395673e+06 1496.976805 -751.589138
5986064 828623 89838 3868 1205 nouveau cas 1334.737300 3738.262700 69.339759 -41.432899 184910.259434 7.335507e+05 1496.964843 -751.583133
5305313 893332 168442 1556 558 manchester city 556.206318 1557.793682 42.392832 -25.331180 213762.920582 8.480111e+05 1469.831068 -737.960042
4755359 862091 193155 2644 1177 joe biden 1005.328449 2815.671551 51.681835 -30.881680 212448.663174 8.427973e+05 1409.442356 -707.640601
3008445 652671 27257 36 40 dénoncer voisin 19.996064 56.003936 3.578941 -2.138541 136887.317890 5.430407e+05 1394.074525 -699.924854
5793883 715957 85819 28 51 mère fils 20.785383 58.214617 1.582466 -0.945578 161418.512238 6.403575e+05 1380.241317 -692.979596
9431033 999988 386521 3371 1585 épidémie coronavirus 1303.953885 3652.046115 57.242560 -34.204405 279140.582886 1.107368e+06 1364.369041 -685.010581
1686286 747304 161595 1256 671 cause coronavirus 507.005475 1419.994525 33.263835 -19.876289 182985.178347 7.259138e+05 1319.218031 -662.341554
5072005 701061 119255 796 159 lionel messi 251.266336 703.733664 34.365056 -20.534307 165151.099914 6.551649e+05 1318.715299 -662.089147
7497761 577739 45011 1207 317 roberto martinez 400.973713 1123.026287 40.252351 -24.052169 125375.888647 4.973741e+05 1277.556660 -641.424574
8095073 959023 456198 2766 1384 sophie wilmès 1091.890360 3058.109640 50.663406 -30.273133 284921.060630 1.130300e+06 1262.883533 -634.057617
3549067 637483 111473 955 309 fc barcelone 332.566124 931.433876 34.131409 -20.394695 150784.462557 5.981715e+05 1253.377069 -629.284695
2218210 1037905 583996 3962 4068 conseil national 2112.742069 5917.257931 40.232243 -24.040154 326531.158848 1.295370e+06 1244.903180 -625.030198
2033809 745120 244611 2429 1569 comité concertation 1051.898231 2946.101769 42.459920 -25.371267 199258.777433 7.904722e+05 1222.850915 -613.958388
5867131 1009985 576140 3854 3958 national sécurité 2055.384937 5756.615063 39.672689 -23.705801 319328.512855 1.266796e+06 1222.202721 -613.632948
90070 935660 527934 714 1208 15 an 505.689945 1416.310055 9.263350 -5.535171 294659.812716 1.168934e+06 1180.858071 -592.874984
7317733 464121 17642 1223 52 remco evenepoel 335.460291 939.539709 48.458195 -28.955444 96991.512232 3.847715e+05 1178.833321 -591.858415
8882029 460927 15646 1070 76 van aert 301.519603 844.480397 44.256291 -26.444661 95946.629274 3.806264e+05 1178.296661 -591.588974
4311538 433990 2087 6 4 héberger ami 2.631061 7.368939 2.076958 -1.241054 87793.723635 3.482833e+05 1168.397604 -586.618941
4567993 515059 82980 702 230 inter milan 245.214895 686.785105 29.170140 -17.430165 120400.917014 4.776381e+05 1137.381355 -571.046572
4564018 408314 3920 27 7 intensif cause 8.945608 25.054392 6.036399 -3.606957 82993.503140 3.292405e+05 1129.248126 -566.963111
5989045 953952 638232 3479 3124 nouveau mesure 1737.289649 4865.710351 41.786868 -24.969095 320548.348278 1.271636e+06 1118.751914 -561.693263
5184949 550208 143010 1467 720 lundi soir 575.413064 1611.586936 37.168398 -22.209400 139562.943037 5.536551e+05 1099.212200 -551.882933
2852595 376861 148 30 5 droit citer 9.208714 25.791286 6.851440 -4.093972 75901.787881 3.011072e+05 1092.399866 -548.462656
7798373 819508 478945 3103 2711 santé public 1529.698928 4284.301072 40.226186 -24.036534 261412.603359 1.037040e+06 1091.553884 -548.037912
5056072 905840 607665 2876 1268 ligue champion 1090.311723 3053.688277 54.079206 -32.314192 304708.204491 1.208797e+06 1088.999363 -546.755360
9020867 440965 53711 1175 292 vincent kompany 385.976664 1081.023336 40.161444 -23.997849 99591.237403 3.950848e+05 1081.731745 -543.106498
726455 379175 8157 20 19 an soin 10.261138 28.738862 3.040258 -1.816659 77980.078204 3.093519e+05 1078.588566 -541.528398
3360543 560395 174700 1743 640 europa league 626.981862 1756.018138 44.570116 -26.632182 147993.880153 5.871011e+05 1072.007830 -538.224399
1493702 359272 73 4 2 bruxelles contracter 1.578637 4.421363 1.927165 -1.151548 72345.561953 2.869994e+05 1066.754835 -535.587021
836340 358602 356 5 4 appel déchirant 2.367955 6.632045 1.710435 -1.022044 72267.648715 2.866904e+05 1065.127238 -534.769851
5397630 357399 55 3 1 masque plaire 1.052424 2.947576 1.898449 -1.134389 71964.854116 2.854891e+05 1064.009979 -534.208907
8049029 356985 10 2 1 soin témoignage 0.789318 2.210682 1.362711 -0.814267 71872.445392 2.851226e+05 1063.494212 -533.949955
4322577 357098 92 6 3 hôpital reine 2.367955 6.632045 2.360285 -1.410352 71911.704000 2.852783e+05 1063.478863 -533.942249
3798531 356985 55 2 1 françoise choquet 0.789318 2.210682 1.362711 -0.814267 71881.505070 2.851585e+05 1063.393399 -533.899340
1852608 356985 55 2 1 choquet mère 0.789318 2.210682 1.362711 -0.814267 71881.505070 2.851585e+05 1063.393399 -533.899340
2910372 356985 55 2 1 déchirant françoise 0.789318 2.210682 1.362711 -0.814267 71881.505070 2.851585e+05 1063.393399 -533.899340
6593971 357321 803 3 2 plaire appel 1.315531 3.684469 1.468631 -0.877558 72099.742667 2.860243e+05 1062.221366 -533.310896
3463370 356985 587 2 8 fabiola bruxelles 2.631061 7.368939 -0.389050 0.232471 71988.610607 2.855834e+05 1062.202850 -533.301599
8923403 357624 2219 7 14 venir transférer 5.525228 15.474772 0.627407 -0.374898 72445.822398 2.873972e+05 1059.521115 -531.955177
In [58]:
fig = px.bar(graph_interactions_media2, x="res_inter_media", y="bigramme", labels={"res_inter_media":"Résiduel de Pearson (<b>en fonction des interactions</b>)","bigramme":"Bigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [59]:
graph_interactions_nonmedia2 = khi2_2.sort_values(by=["res_inter_nonmedia"],ascending=False).head(50)
graph_interactions_nonmedia2
Out[59]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
6527313 406762 16320066 1094 48045 photo from 12928.771177 36210.228823 -104.083345 62.193390 3.367549e+06 1.335928e+07 -1613.430175 810.057037
6256788 358840 7677739 700 2160 pairi daiza 752.483477 2107.516523 -1.913261 1.143239 1.617974e+06 6.418605e+06 -989.888222 496.994498
9083199 54407 4207833 182 597 voice kids 204.959660 574.040340 -1.603730 0.958284 8.581006e+05 3.404139e+06 -867.603704 435.598947
4423343 721 3529914 10 1918 in het 507.268582 1420.731418 -22.078626 13.192741 7.108093e+05 2.819826e+06 -842.239898 422.864507
5058006 0 3485133 0 39 likeetpartage cesttouss 10.261138 28.738862 -3.203301 1.914083 7.016486e+05 2.783484e+06 -837.644656 420.557368
6137739 115 3348925 5 880 ook in 232.848908 652.151092 -14.931720 8.922218 6.742495e+05 2.674791e+06 -820.986908 412.193991
9110551 115 3343761 5 838 volg ons 221.798451 621.201549 -14.557169 8.698411 6.732098e+05 2.670666e+06 -820.353494 411.875971
6134444 115 3343200 5 831 ons ook 219.956709 616.043291 -14.493804 8.660548 6.730969e+05 2.670218e+06 -820.284652 411.841408
8622968 0 3322292 0 36 touré collection 9.471820 26.528180 -3.077632 1.838991 6.688644e+05 2.653428e+06 -817.841312 410.614677
2540159 115 3321729 5 819 daiza nl 216.799435 607.200565 -14.384532 8.595254 6.687742e+05 2.653070e+06 -817.645545 410.516388
4198411 777 3323881 10 858 het nederlands 228.376104 639.623896 -14.450397 8.634611 6.693407e+05 2.655317e+06 -817.182752 410.284033
5886291 115 3290969 5 812 nederlands op 214.957692 602.042308 -14.320405 8.556936 6.625814e+05 2.628503e+06 -813.849767 408.610636
6139716 115 3228050 5 794 op pairi 210.221782 588.778218 -14.154176 8.457609 6.499142e+05 2.578251e+06 -806.029900 404.684506
8516822 182995 4632872 616 1461 the voice 546.471392 1530.528608 2.974266 -1.777227 9.695602e+05 3.846307e+06 -798.817063 401.063147
2006172 0 3121863 0 39 collection likeetpartage 10.261138 28.738862 -3.203301 1.914083 6.285128e+05 2.493350e+06 -792.787994 398.036124
6340536 0 3118599 0 34 partenaire touré 8.945608 25.054392 -2.990921 1.787178 6.278557e+05 2.490743e+06 -792.373443 397.827990
1380921 15633 3128883 90 2030 bon journée 557.784955 1562.215045 -19.806733 11.835207 6.330734e+05 2.511443e+06 -776.011265 389.613009
2540450 0 2870770 0 754 daiza' post 198.382008 555.617992 -14.084815 8.416163 5.779612e+05 2.292809e+06 -760.237597 381.693503
3817554 0 2870770 0 754 from pairi 198.382008 555.617992 -14.084815 8.416163 5.779612e+05 2.292809e+06 -760.237597 381.693503
6256789 0 2870770 0 754 pairi daiza' 198.382008 555.617992 -14.084815 8.416163 5.779612e+05 2.292809e+06 -760.237597 381.693503
2831025 0 2545312 0 31 dougourouni tv 8.156289 22.843711 -2.855922 1.706512 5.124380e+05 2.032874e+06 -715.847737 359.406627
5935146 0 2263822 0 617 nl photo 162.336470 454.663530 -12.741133 7.613267 4.557667e+05 1.808055e+06 -675.104934 338.950833
2831011 0 2065175 0 42 dougourouni city 11.050457 30.949543 -3.324223 1.986338 4.157738e+05 1.649401e+06 -644.805263 323.738237
4541686 0 2040068 0 29 instagram partenaire 7.630077 21.369923 -2.762259 1.650545 4.107191e+05 1.629349e+06 -640.873724 321.764324
2377882 5970 2048176 186 530 cours après-midi 188.383975 527.616025 -0.173692 0.103787 4.135534e+05 1.640593e+06 -633.797746 318.211678
2737145 38599 2282315 76 3416 disneyland paris 918.766539 2573.233461 -27.803835 16.613751 4.672608e+05 1.853653e+06 -627.097547 314.847701
551314 0 1925678 0 236 age tendre 62.093042 173.906958 -7.879914 4.708520 3.876894e+05 1.537989e+06 -622.647113 312.613265
1230400 27364 2136184 142 2020 bel journée 568.835411 1593.164589 -17.896460 10.693752 4.355789e+05 1.727969e+06 -618.522396 310.542363
8547754 105277 2734947 740 11316 timeline photo 3172.007271 8883.992729 -43.181506 25.802440 5.718115e+05 2.268413e+06 -616.960442 309.758151
5500321 0 1842355 0 252 merveill monde 66.302740 185.697260 -8.142649 4.865514 3.709143e+05 1.471441e+06 -609.027359 305.775177
4542421 325 1836805 4 136 instagram youtube 36.834856 103.165144 -5.410104 3.232724 3.698624e+05 1.467268e+06 -607.628736 305.072969
5833206 16442 1961324 311 478 météo mons 207.590721 581.409279 7.177208 -4.288630 3.981761e+05 1.579590e+06 -604.955411 303.730769
2169822 0 1768482 0 45 confinement day 11.839775 33.160225 -3.440897 2.056055 3.560418e+05 1.412440e+06 -596.692345 299.582120
3857503 2175 1747921 22 126 félix radu 38.939704 109.060296 -2.714623 1.622081 3.523402e+05 1.397756e+06 -589.918297 296.181065
1380473 42475 2085912 88 1645 bon fête 455.962890 1277.037110 -17.232142 10.296800 4.285001e+05 1.699887e+06 -589.712121 296.077550
8420461 0 1690743 0 50 tarmac comedy 13.155306 36.844694 -3.627024 2.167272 3.403909e+05 1.350352e+06 -583.430250 292.923603
8118663 28730 1912794 62 1141 souhaite bon 316.516651 886.483349 -14.305990 8.548323 3.908796e+05 1.550644e+06 -579.250656 290.825148
6446817 0 1652852 0 287 pensée grégory 75.511454 211.488546 -8.689733 5.192415 3.327624e+05 1.320090e+06 -576.855620 289.622669
1328466 0 1641157 0 292 bisous 3 76.826984 215.173016 -8.765100 5.237450 3.304079e+05 1.310749e+06 -574.811183 288.596216
2671803 0 1614130 0 234 dicton jour 61.566830 172.433170 -7.846453 4.688527 3.249667e+05 1.289163e+06 -570.058465 286.210013
4070103 0 1613833 0 299 grégory 3 78.668727 220.331273 -8.869539 5.299856 3.249069e+05 1.288926e+06 -570.006017 286.183680
4804353 40094 1942646 114 1316 joyeux anniversaire 376.241738 1053.758262 -13.519740 8.078511 3.991775e+05 1.583562e+06 -568.345498 285.349981
5245566 0 1559231 0 40 mah touré 10.524244 29.475756 -3.244109 1.938467 3.139140e+05 1.245317e+06 -560.280328 281.300691
683993 0 1544595 0 11 america' got 2.894167 8.105833 -1.701225 1.016541 3.109674e+05 1.233628e+06 -557.644543 279.977338
4831588 0 1542327 0 5 jury america' 1.315531 3.684469 -1.146966 0.685352 3.105108e+05 1.231816e+06 -557.234985 279.771711
1390057 0 1535706 0 384 bonjour souhaite 101.032747 282.967253 -10.051505 6.006121 3.091778e+05 1.226528e+06 -556.037631 279.170554
3992109 1067 1544509 5 25 got talent 7.893183 22.106817 -1.029793 0.615337 3.111649e+05 1.234411e+06 -555.908802 279.105872
5570953 3276 1536492 106 399 min max 132.868586 372.131414 -2.330954 1.392825 3.099956e+05 1.229772e+06 -550.888605 276.585375
2324982 176452 2826099 261 268 coronavirus confinement 139.183133 389.816867 10.325572 -6.169886 6.044922e+05 2.398059e+06 -550.540394 276.410549
6250714 0 1410106 0 165 page merveill 43.412508 121.587492 -6.588817 3.937045 2.838913e+05 1.126215e+06 -532.814487 267.510879
In [60]:
fig = px.bar(graph_interactions_nonmedia2, x="res_inter_nonmedia", y="bigramme", labels={"res_inter_nonmedia":"Résiduel de Pearson (<b>en fonction des interactions</b>)","bigramme":"Bigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="cyan", opacity=0.75, textfont_size=12)
fig

trigrammes

In [61]:
media3 = pan.read_csv("belgique-trigrammes-media-nettoye.csv", low_memory=False, names=["trigramme","interactions"])
nonmedia3 = pan.read_csv("belgique-trigrammes-nonmedia-nettoye.csv", low_memory=False, names=["trigramme","interactions"])
In [62]:
media3
Out[62]:
trigramme interactions
0 vie bienvenue émilie 175299
1 bienvenue émilie dylan 175299
2 émilie dylan faire 175299
3 dylan faire tour 175299
4 faire tour voiture 175299
... ... ...
7797166 recrute gardien timon 7
7797167 gardien timon wellenreuther 7
7797168 timon wellenreuther club 7
7797169 wellenreuther club bruxellois 7
7797170 club bruxellois renforcer 7

7797171 rows × 2 columns

In [63]:
nonmedia3
Out[63]:
trigramme interactions
0 moment tendresse bébé 224233
1 tendresse bébé bébé 224233
2 bébé bébé apprenon 224233
3 bébé apprenon changer 224233
4 apprenon changer regard 224233
... ... ...
22091456 web voter ici 5
22091457 voter ici visitgaume 5
22091458 ici visitgaume soleildegaume 5
22091459 visitgaume soleildegaume voter 5
22091460 soleildegaume voter logo 5

22091461 rows × 2 columns

In [64]:
media3_table = pan.pivot_table(media3,index=["trigramme"],values=["trigramme","interactions"],aggfunc=[len,np.sum])
nonmedia3_table = pan.pivot_table(nonmedia3,index=["trigramme"],values=["trigramme","interactions"],aggfunc=[len,np.sum])
In [65]:
media3_table
Out[65]:
len sum
interactions interactions
trigramme
#balades travail ecaussinne 1 45
#breath4life masque plongée 1 9
#cestbienlecas lidar enregistrer 1 422
#confinemer coronavirus coronavirus 1 49
#mobilité écologie train 1 26
... ... ...
静岡県 第3戦 8月22日・23 1 45
그切 ln24be françois 1 36
나4 oo 6ဝ 1 5
무를 oand egg 1 414
제이행 chadwick boseman 1 2645

5197497 rows × 2 columns

In [66]:
nonmedia3_table
Out[66]:
len sum
interactions interactions
trigramme
#1 cafes colombe 1 79
#2 continue faire 1 6
#2 spectacle danse 2 18
#2020toussolidaire vivaforlife vivaforreyer 1 826
#21juillet belgique belgië 1 693
... ... ...
single musique eden 1 73
single streaming timeline 1 12
solidaire en 2021 1 12
𐀁мвя иσιя parfum 1 12
𐌾ᗩïᗩᙅ lumière temps 1 11

14650061 rows × 2 columns

In [67]:
media3_table["type"] = "media"
nonmedia3_table["type"] = "non-media"
In [68]:
media3_table
Out[68]:
len sum type
interactions interactions
trigramme
#balades travail ecaussinne 1 45 media
#breath4life masque plongée 1 9 media
#cestbienlecas lidar enregistrer 1 422 media
#confinemer coronavirus coronavirus 1 49 media
#mobilité écologie train 1 26 media
... ... ... ...
静岡県 第3戦 8月22日・23 1 45 media
그切 ln24be françois 1 36 media
나4 oo 6ဝ 1 5 media
무를 oand egg 1 414 media
제이행 chadwick boseman 1 2645 media

5197497 rows × 3 columns

In [69]:
nonmedia3_table
Out[69]:
len sum type
interactions interactions
trigramme
#1 cafes colombe 1 79 non-media
#2 continue faire 1 6 non-media
#2 spectacle danse 2 18 non-media
#2020toussolidaire vivaforlife vivaforreyer 1 826 non-media
#21juillet belgique belgië 1 693 non-media
... ... ... ...
single musique eden 1 73 non-media
single streaming timeline 1 12 non-media
solidaire en 2021 1 12 non-media
𐀁мвя иσιя parfum 1 12 non-media
𐌾ᗩïᗩᙅ lumière temps 1 11 non-media

14650061 rows × 3 columns

In [70]:
tableau3 = pan.concat([media3_table,nonmedia3_table], names = ["trigramme","nb","interactions","type"], levels=0)
In [71]:
tableau3
Out[71]:
len sum type
interactions interactions
trigramme
#balades travail ecaussinne 1 45 media
#breath4life masque plongée 1 9 media
#cestbienlecas lidar enregistrer 1 422 media
#confinemer coronavirus coronavirus 1 49 media
#mobilité écologie train 1 26 media
... ... ... ...
single musique eden 1 73 non-media
single streaming timeline 1 12 non-media
solidaire en 2021 1 12 non-media
𐀁мвя иσιя parfum 1 12 non-media
𐌾ᗩïᗩᙅ lumière temps 1 11 non-media

19847558 rows × 3 columns

In [72]:
tableau3["trigramme"] = tableau3.index
tableau3.columns = ["nb","interactions","media","trigramme"]
tableau3.reset_index(drop=True, inplace=True)
tableau3
Out[72]:
nb interactions media trigramme
0 1 45 media #balades travail ecaussinne
1 1 9 media #breath4life masque plongée
2 1 422 media #cestbienlecas lidar enregistrer
3 1 49 media #confinemer coronavirus coronavirus
4 1 26 media #mobilité écologie train
... ... ... ... ...
19847553 1 73 non-media single musique eden
19847554 1 12 non-media single streaming timeline
19847555 1 12 non-media solidaire en 2021
19847556 1 12 non-media 𐀁мвя иσιя parfum
19847557 1 11 non-media 𐌾ᗩïᗩᙅ lumière temps

19847558 rows × 4 columns

In [73]:
khi2_3 = pan.pivot_table(tableau3,index=["trigramme"],columns=["media"],values=["trigramme","interactions"],aggfunc=[len,np.sum],fill_value=0)
In [74]:
khi2_3
Out[74]:
len sum
interactions nb interactions nb
media media non-media media non-media media non-media media non-media
trigramme
#1 cafes colombe 0 1 0 1 0 79 0 1
#2 continue faire 0 1 0 1 0 6 0 1
#2 spectacle danse 0 1 0 1 0 18 0 2
#2020toussolidaire vivaforlife vivaforreyer 0 1 0 1 0 826 0 1
#21juillet belgique belgië 0 1 0 1 0 693 0 1
... ... ... ... ... ... ... ... ...
single musique eden 0 1 0 1 0 73 0 1
single streaming timeline 0 1 0 1 0 12 0 1
solidaire en 2021 0 1 0 1 0 12 0 1
𐀁мвя иσιя parfum 0 1 0 1 0 12 0 1
𐌾ᗩïᗩᙅ lumière temps 0 1 0 1 0 11 0 1

18981336 rows × 8 columns

In [75]:
khi2_3["trigramme"] = khi2_3.index
khi2_3.columns = khi2_3.columns.get_level_values(0)
khi2_3.reset_index(drop=True, inplace=True)
khi2_3.columns = ["a","b","c","d","interactions_media","interactions_nonmedia","obs_media","obs_nonmedia","trigramme"]
khi2_3 = khi2_3.drop(columns=["a","b","c","d"])
khi2_3
Out[75]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme
0 0 79 0 1 #1 cafes colombe
1 0 6 0 1 #2 continue faire
2 0 18 0 2 #2 spectacle danse
3 0 826 0 1 #2020toussolidaire vivaforlife vivaforreyer
4 0 693 0 1 #21juillet belgique belgië
... ... ... ... ... ...
18981331 0 73 0 1 single musique eden
18981332 0 12 0 1 single streaming timeline
18981333 0 12 0 1 solidaire en 2021
18981334 0 12 0 1 𐀁мвя иσιя parfum
18981335 0 11 0 1 𐌾ᗩïᗩᙅ lumière temps

18981336 rows × 5 columns

In [76]:
khi2_3.obs_media.sum(), khi2_3.obs_nonmedia.sum()
Out[76]:
(7797171, 22091461)
In [77]:
print("Nb de lignes fichier media3 = ", media3.shape[0])
print("Somme observée média khi2_3 = ", khi2_3.obs_media.sum())

print("Nb de lignes  fichier nonmedia3 = ", nonmedia3.shape[0])
print("Somme observée nonmédia khi2_3 = ", khi2_3.obs_nonmedia.sum())
Nb de lignes fichier media3 =  7797171
Somme observée média khi2_3 =  7797171
Nb de lignes  fichier nonmedia3 =  22091461
Somme observée nonmédia khi2_3 =  22091461
In [78]:
khi2_3["exp_media"] = ((khi2_3.obs_media + khi2_3.obs_nonmedia) * khi2_3.obs_media.sum()) / (khi2_3.obs_media.sum() + khi2_3.obs_nonmedia.sum())
khi2_3["exp_nonmedia"] = ((khi2_3.obs_media + khi2_3.obs_nonmedia) * khi2_3.obs_nonmedia.sum()) / (khi2_3.obs_media.sum() + khi2_3.obs_nonmedia.sum())
khi2_3
Out[78]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia
0 0 79 0 1 #1 cafes colombe 0.260874 0.739126
1 0 6 0 1 #2 continue faire 0.260874 0.739126
2 0 18 0 2 #2 spectacle danse 0.521748 1.478252
3 0 826 0 1 #2020toussolidaire vivaforlife vivaforreyer 0.260874 0.739126
4 0 693 0 1 #21juillet belgique belgië 0.260874 0.739126
... ... ... ... ... ... ... ...
18981331 0 73 0 1 single musique eden 0.260874 0.739126
18981332 0 12 0 1 single streaming timeline 0.260874 0.739126
18981333 0 12 0 1 solidaire en 2021 0.260874 0.739126
18981334 0 12 0 1 𐀁мвя иσιя parfum 0.260874 0.739126
18981335 0 11 0 1 𐌾ᗩïᗩᙅ lumière temps 0.260874 0.739126

18981336 rows × 7 columns

In [79]:
khi2_3["res_media"] = (khi2_3.obs_media - khi2_3.exp_media) / np.sqrt(khi2_3.exp_media)
khi2_3["res_nonmedia"] = (khi2_3.obs_nonmedia - khi2_3.exp_nonmedia) / np.sqrt(khi2_3.exp_nonmedia)
khi2_3
Out[79]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia
0 0 79 0 1 #1 cafes colombe 0.260874 0.739126 -0.510758 0.303439
1 0 6 0 1 #2 continue faire 0.260874 0.739126 -0.510758 0.303439
2 0 18 0 2 #2 spectacle danse 0.521748 1.478252 -0.722321 0.429128
3 0 826 0 1 #2020toussolidaire vivaforlife vivaforreyer 0.260874 0.739126 -0.510758 0.303439
4 0 693 0 1 #21juillet belgique belgië 0.260874 0.739126 -0.510758 0.303439
... ... ... ... ... ... ... ... ... ...
18981331 0 73 0 1 single musique eden 0.260874 0.739126 -0.510758 0.303439
18981332 0 12 0 1 single streaming timeline 0.260874 0.739126 -0.510758 0.303439
18981333 0 12 0 1 solidaire en 2021 0.260874 0.739126 -0.510758 0.303439
18981334 0 12 0 1 𐀁мвя иσιя parfum 0.260874 0.739126 -0.510758 0.303439
18981335 0 11 0 1 𐌾ᗩïᗩᙅ lumière temps 0.260874 0.739126 -0.510758 0.303439

18981336 rows × 9 columns

In [80]:
graph_media3 = khi2_3.sort_values(by=["res_media"],ascending=False).head(50)
graph_media3
Out[80]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia
16768707 190149 5485 1483 154 television local region 427.050958 1209.949042 51.097907 -30.357040
17639945 190149 5485 1483 154 vedia television local 427.050958 1209.949042 51.097907 -30.357040
9994421 190149 5485 1483 154 local region verviers 427.050958 1209.949042 51.097907 -30.357040
919457 72074 13859 1284 134 actualité région bruxelles 369.919523 1048.080477 47.525950 -28.234956
2898345 72074 13859 1284 134 bruxelles capitale bx1be 369.919523 1048.080477 47.525950 -28.234956
16477661 72074 13859 1284 134 suivre actualité région 369.919523 1048.080477 47.525950 -28.234956
17401137 72074 13859 1284 134 télévision bx1 radio 369.919523 1048.080477 47.525950 -28.234956
2990877 72074 13859 1284 134 bx1 télévision bx1 369.919523 1048.080477 47.525950 -28.234956
3146471 72074 13859 1284 134 capitale bx1be également 369.919523 1048.080477 47.525950 -28.234956
2990991 72074 13859 1284 134 bx1be également bx1 369.919523 1048.080477 47.525950 -28.234956
18609877 72074 13859 1284 134 également bx1 télévision 369.919523 1048.080477 47.525950 -28.234956
2990848 70905 13859 1272 134 bx1 suivre actualité 366.789033 1039.210967 47.265215 -28.080054
15021172 321975 42982 1533 523 rtc télé liège 536.357220 1519.642780 43.034074 -25.566352
5122140 254875 8005 1116 160 dernier 24 heure 332.875395 943.124605 42.922995 -25.500361
18448921 409367 331747 1195 224 yves van laethem 370.180397 1048.819603 42.869880 -25.468805
8887323 280842 14583 924 40 institut santé public 251.482666 712.517334 42.408163 -25.194501
16678563 210812 13561 1014 121 t on apprendre 296.092142 838.907858 41.721053 -24.786293
10421499 383013 58659 1149 236 marc van ranst 361.310676 1023.689324 41.439505 -24.619026
4321599 1003102 559225 3831 3824 conseil national sécurité 1996.991498 5658.008502 41.040556 -24.382012
18399250 334820 15156 789 55 wout van aert 220.177769 623.822231 38.334502 -22.774358
15515939 160911 12762 749 38 santé public sciensano 205.307944 581.692056 37.944612 -22.542725
9035126 103459 5176 767 67 invité martin buxant 217.569028 616.430972 37.248999 -22.129465
11651603 131552 232 673 11 nombre moyen infection 178.437908 505.562092 37.023482 -21.995487
15179723 85014 51095 1362 641 région bruxelles capitale 522.530891 1480.469109 36.723895 -21.817503
1354814 160618 23130 829 152 américain donald trump 255.917526 725.082474 35.823426 -21.282538
16906741 253368 56535 1031 337 tester positif coronavirus 356.875816 1011.124184 35.684642 -21.200088
18720987 237912 4790 683 50 épidémie coronavirus belgique 191.220740 541.779260 35.563354 -21.128031
11651734 116313 2611 650 45 nombre nouveau cas 181.307523 513.692477 34.808080 -20.679326
7754060 309852 383296 1604 1076 georges louis bouchez 699.142680 1980.857320 34.221355 -20.330755
13955128 143579 22518 734 123 président américain donald 223.569133 633.430867 34.137445 -20.280904
16278853 136644 946 500 6 sport foot magazine 132.002312 373.997688 32.029807 -19.028766
2575168 23623 0 473 0 bilan jour belgique 123.393466 349.606534 31.472661 -18.697768
18878176 54812 4435 503 21 évolution pandémie covid-19 136.698046 387.301954 31.329833 -18.612915
11716317 105715 5090 541 56 nouveau cas coronavirus 155.741858 441.258142 30.870890 -18.340259
7538015 312212 0 433 0 from rtl info' 112.958500 320.041500 30.112502 -17.889704
15022690 312212 0 433 0 rtl info' post 112.958500 320.041500 30.112502 -17.889704
5142326 49865 132 428 7 dernier évolution pandémie 113.480248 321.519752 29.524856 -17.540586
8887065 121244 3086 511 65 institut royal météorologique 150.263501 425.736499 29.428175 -17.483148
16906747 148224 84901 824 361 tester positif covid-19 309.135849 875.864151 29.283162 -17.396997
10962486 283986 136586 944 501 ministre sophie wilmès 376.963124 1068.036876 29.205313 -17.350747
11155652 100218 20318 577 127 mondial santé om 183.655391 520.344609 29.024942 -17.243589
5040933 139402 8394 448 29 deceuninck quick step 124.436962 352.563038 29.005757 -17.232192
12123931 141289 102433 826 375 organisation mondial santé 313.309835 887.690165 28.964629 -17.207758
4791124 76781 1444 418 23 crise spf santé 115.045493 325.954507 28.245067 -16.780269
12885351 279752 7859 380 13 photo from rtl 102.523535 290.476465 27.404026 -16.280610
11716323 71481 6723 465 74 nouveau cas covid-19 140.611158 398.388842 27.356215 -16.252206
14325887 67468 661 376 13 rapport semaine précédent 101.480038 287.519962 27.251074 -16.189742
17607267 187714 1076 422 44 van der poel 121.567347 344.432653 27.248253 -16.188066
9336973 21009 0 348 0 jour belgique coronavirus 90.784199 257.215801 26.995571 -16.037949
7181215 154616 60364 526 146 finale ligue champion 175.307418 496.692582 26.486613 -15.735580
In [81]:
fig = px.bar(graph_media3, x="res_media", y="trigramme", labels={"res_media":"Résiduel de Pearson","trigramme":"Trigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="DarkOliveGreen", opacity=0.75, textfont_size=12)
fig
In [82]:
graph_nonmedia3 = khi2_3.sort_values(by=["res_nonmedia"],ascending=False).head(50)
graph_nonmedia3
Out[82]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia
3178379 1064 273967 3 4481 caroline taquin bourgmestre 1169.759618 3314.240382 -34.114034 20.266996
2786711 808 219791 6 3436 bourgmestre caroline taquin 897.928770 2544.071230 -29.765229 17.683391
16718669 0 201369 0 3050 taquin bourgmestre caroline 795.666110 2254.333890 -28.207554 16.757983
9061281 36162 742336 985 7535 is re-share of 2222.647625 6297.352375 -26.251958 15.596172
14370650 36162 742336 985 7535 re-share of post 2222.647625 6297.352375 -26.251958 15.596172
16948544 36162 742025 985 7524 this is re-share 2219.778009 6289.221991 -26.208014 15.570065
16718892 895 115784 1 1952 taquin caroline taquin 509.487184 1443.512816 -22.527520 13.383500
3178384 895 115286 1 1947 caroline taquin caroline 508.182814 1439.817186 -22.498551 13.366289
17607282 4116 232262 19 1998 van der valk 526.183129 1490.816871 -22.110387 13.135683
12884750 0 134947 0 1812 photo from commune 472.703931 1339.296069 -21.741755 12.916681
12885493 0 192240 0 1417 photo from ville 369.658648 1047.341352 -19.226509 11.422384
514077 4 306789 1 1401 3 3 3 365.745536 1036.254464 -19.072186 11.330701
11305102 0 142162 0 1307 mp page fb 340.962494 966.037506 -18.465170 10.970075
6451200 0 141902 0 1305 envoyer mp page 340.440745 964.559255 -18.451036 10.961679
18161030 17907 201183 25 1441 vouloir prendre contact 382.441481 1083.558519 -18.277738 10.858723
2125639 1014 113137 47 1505 auvio voir vidéo 404.876657 1147.123343 -17.785743 10.566431
16395658 1014 113091 47 1503 streaming auvio voir 404.354908 1145.645092 -17.771267 10.557831
7938601 1014 112134 47 1476 gratuitement vidéo programme 397.311307 1125.688693 -17.574731 10.441070
17979652 1041 112134 48 1476 visionner gratuitement vidéo 397.572181 1126.427819 -17.531895 10.415621
10355426 0 153344 0 1138 mamma africa news 296.874765 841.125235 -17.230054 10.236299
743543 0 152900 0 1137 abonner vous mamma 296.613891 840.386109 -17.222482 10.231800
18185100 0 152892 0 1136 vous mamma africa 296.353017 839.646983 -17.214907 10.227300
2125624 977 100542 46 1407 auvio visionner gratuitement 379.050117 1073.949883 -17.106504 10.162898
1029707 0 150465 0 1117 africa news info 291.396408 825.603592 -17.070337 10.141412
13470481 2542 883621 64 1476 prendre soin proche 401.746167 1138.253833 -16.850569 10.010848
12884808 0 1155683 0 982 photo from disneyland 256.178400 725.821600 -16.005574 9.508841
7787734 0 489648 0 968 ginny carine anguima 252.526162 715.473838 -15.891072 9.440815
7538589 0 102368 0 961 from voiture abandonner 250.700043 710.299957 -15.833510 9.406618
18090508 0 102368 0 961 voiture abandonner monde' 250.700043 710.299957 -15.833510 9.406618
727761 0 102368 0 961 abandonner monde' post 250.700043 710.299957 -15.833510 9.406618
12884659 0 81341 0 939 photo from boutique 244.960812 694.039188 -15.651224 9.298323
2191922 0 127141 0 938 avoir recueillir vouloir 244.699938 693.300062 -15.642888 9.293370
14451028 0 127206 0 938 recueillir vouloir prendre 244.699938 693.300062 -15.642888 9.293370
13446978 4 99909 1 931 prendre contact vite 243.134693 688.865307 -15.528645 9.225499
12885505 0 98787 0 923 photo from voiture 240.786826 682.213174 -15.517307 9.218763
8489512 657 304517 5 949 ici photo from 248.873924 705.126076 -15.458796 9.184002
6625469 0 478957 0 910 ev ginny carine 237.395462 672.604538 -15.407643 9.153612
3178414 0 48366 0 880 caroline taquin député 229.569238 650.430762 -15.151542 9.001464
16718954 0 46981 0 862 taquin député bourgmestre 224.873504 637.126496 -14.995783 8.908928
5380191 0 1073806 0 801 disneyland paris bon 208.960182 592.039818 -14.455455 8.587922
12441492 5406 113426 40 1044 parti socialiste ps 282.787562 801.212438 -14.437642 8.577339
8656498 683 3323750 9 852 in het nederlands 224.612630 636.387370 -14.386565 8.546994
12050652 115 3338395 5 826 ook in het 216.786406 614.213594 -14.384079 8.545517
18115245 115 3342756 5 823 volg ons ook 216.003783 611.996217 -14.356863 8.529349
12046128 115 3336266 5 821 ons ook in 215.482035 610.517965 -14.338691 8.518553
8221272 115 3289625 5 810 het nederlands op 212.612419 602.387581 -14.238329 8.458928
7537885 0 124176 0 774 from poils moustaches' 201.916580 572.083420 -14.209735 8.441941
13118498 0 124176 0 774 poils moustaches' post 201.916580 572.083420 -14.209735 8.441941
12266642 115 3231585 5 798 pairi daiza nl 209.481930 593.518070 -14.128031 8.393401
12054369 115 3228050 5 794 op pairi daiza 208.438433 590.561567 -14.091074 8.371445
In [83]:
fig = px.bar(graph_nonmedia3, x="res_nonmedia", y="trigramme", labels={"res_nonmedia":"Résiduel de Pearson","trigramme":"Trigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="Olive", opacity=0.75, textfont_size=12)
fig
In [84]:
khi2_3["exp_inter_media"] = ((khi2_3.interactions_media + khi2_3.interactions_nonmedia) * khi2_3.interactions_media.sum()) / (khi2_3.interactions_media.sum() + khi2_3.interactions_nonmedia.sum())
khi2_3["exp_inter_nonmedia"] = ((khi2_3.interactions_media + khi2_3.interactions_nonmedia) * khi2_3.interactions_nonmedia.sum()) / (khi2_3.interactions_media.sum() + khi2_3.interactions_nonmedia.sum())
khi2_3
Out[84]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia
0 0 79 0 1 #1 cafes colombe 0.260874 0.739126 -0.510758 0.303439 15.834680 63.165320
1 0 6 0 1 #2 continue faire 0.260874 0.739126 -0.510758 0.303439 1.202634 4.797366
2 0 18 0 2 #2 spectacle danse 0.521748 1.478252 -0.722321 0.429128 3.607902 14.392098
3 0 826 0 1 #2020toussolidaire vivaforlife vivaforreyer 0.260874 0.739126 -0.510758 0.303439 165.562607 660.437393
4 0 693 0 1 #21juillet belgique belgië 0.260874 0.739126 -0.510758 0.303439 138.904221 554.095779
... ... ... ... ... ... ... ... ... ... ... ...
18981331 0 73 0 1 single musique eden 0.260874 0.739126 -0.510758 0.303439 14.632046 58.367954
18981332 0 12 0 1 single streaming timeline 0.260874 0.739126 -0.510758 0.303439 2.405268 9.594732
18981333 0 12 0 1 solidaire en 2021 0.260874 0.739126 -0.510758 0.303439 2.405268 9.594732
18981334 0 12 0 1 𐀁мвя иσιя parfum 0.260874 0.739126 -0.510758 0.303439 2.405268 9.594732
18981335 0 11 0 1 𐌾ᗩïᗩᙅ lumière temps 0.260874 0.739126 -0.510758 0.303439 2.204829 8.795171

18981336 rows × 11 columns

In [85]:
khi2_3["res_inter_media"] = (khi2_3.interactions_media - khi2_3.exp_inter_media) / np.sqrt(khi2_3.exp_inter_media)
khi2_3["res_inter_nonmedia"] = (khi2_3.interactions_nonmedia - khi2_3.exp_inter_nonmedia) / np.sqrt(khi2_3.exp_inter_nonmedia)
khi2_3
Out[85]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
0 0 79 0 1 #1 cafes colombe 0.260874 0.739126 -0.510758 0.303439 15.834680 63.165320 -3.979281 1.992370
1 0 6 0 1 #2 continue faire 0.260874 0.739126 -0.510758 0.303439 1.202634 4.797366 -1.096647 0.549075
2 0 18 0 2 #2 spectacle danse 0.521748 1.478252 -0.722321 0.429128 3.607902 14.392098 -1.899448 0.951027
3 0 826 0 1 #2020toussolidaire vivaforlife vivaforreyer 0.260874 0.739126 -0.510758 0.303439 165.562607 660.437393 -12.867113 6.442381
4 0 693 0 1 #21juillet belgique belgië 0.260874 0.739126 -0.510758 0.303439 138.904221 554.095779 -11.785764 5.900965
... ... ... ... ... ... ... ... ... ... ... ... ... ...
18981331 0 73 0 1 single musique eden 0.260874 0.739126 -0.510758 0.303439 14.632046 58.367954 -3.825186 1.915216
18981332 0 12 0 1 single streaming timeline 0.260874 0.739126 -0.510758 0.303439 2.405268 9.594732 -1.550893 0.776510
18981333 0 12 0 1 solidaire en 2021 0.260874 0.739126 -0.510758 0.303439 2.405268 9.594732 -1.550893 0.776510
18981334 0 12 0 1 𐀁мвя иσιя parfum 0.260874 0.739126 -0.510758 0.303439 2.405268 9.594732 -1.550893 0.776510
18981335 0 11 0 1 𐌾ᗩïᗩᙅ lumière temps 0.260874 0.739126 -0.510758 0.303439 2.204829 8.795171 -1.484867 0.743452

18981336 rows × 13 columns

In [86]:
graph_interactions_media3 = khi2_3.sort_values(by=["res_inter_media"],ascending=False).head(50)
graph_interactions_media3
Out[86]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
11379766 713970 1157 4 2 mère fils 15 1.565245 4.434755 1.946096 -1.156167 143339.334986 5.717877e+05 1507.205304 -754.636340
7149196 714035 1710 6 4 fils 15 an 2.608741 7.391259 2.099643 -1.247388 143463.206283 5.722818e+05 1506.399046 -754.232659
4321599 1003102 559225 3831 3824 conseil national sécurité 1996.991498 5658.008502 41.040556 -24.382012 313151.248954 1.249176e+06 1232.937374 -617.314274
8436613 433985 1950 5 3 héberger ami besoin 2.086993 5.913007 2.016420 -1.197946 87378.371950 3.485566e+05 1172.559498 -587.083927
16019389 408314 3903 27 6 soin intensif cause 8.608846 24.391154 6.268108 -3.723855 82624.359939 3.295926e+05 1133.052126 -567.303146
8905166 387128 1599 15 4 intensif cause coronavirus 4.956609 14.043391 4.511158 -2.680059 77916.048018 3.108110e+05 1107.752692 -554.636077
1413961 379157 3328 19 10 an soin intensif 7.565350 21.434650 4.157271 -2.469816 76664.907831 3.058201e+05 1092.485589 -546.992055
2900193 359272 73 4 2 bruxelles contracter covid-19 1.565245 4.434755 1.946096 -1.156167 72026.749557 2.873183e+05 1070.301042 -535.884567
16025965 356985 0 2 0 soin témoignage mère 0.521748 1.478252 2.046529 -1.215834 71553.713536 2.854313e+05 1067.051759 -534.257697
13470909 356985 0 2 0 prendre soin témoignage 0.521748 1.478252 2.046529 -1.215834 71553.713536 2.854313e+05 1067.051759 -534.257697
8460621 357060 92 4 3 hôpital reine fabiola 1.826119 5.173881 1.608686 -0.955713 71587.186848 2.855648e+05 1066.957466 -534.210485
3580558 356985 55 2 1 choquet mère fils 0.782622 2.217378 1.376097 -0.817533 71564.737681 2.854753e+05 1066.928360 -534.195912
10827270 356985 55 2 1 mettre masque plaire 0.782622 2.217378 1.376097 -0.817533 71564.737681 2.854753e+05 1066.928360 -534.195912
13020477 356985 55 2 1 plaire appel déchirant 0.782622 2.217378 1.376097 -0.817533 71564.737681 2.854753e+05 1066.928360 -534.195912
1419574 356985 55 2 1 an venir transférer 0.782622 2.217378 1.376097 -0.817533 71564.737681 2.854753e+05 1066.928360 -534.195912
5716597 356985 55 2 1 déchirant françoise choquet 0.782622 2.217378 1.376097 -0.817533 71564.737681 2.854753e+05 1066.928360 -534.195912
1653846 356985 55 2 1 appel déchirant françoise 0.782622 2.217378 1.376097 -0.817533 71564.737681 2.854753e+05 1066.928360 -534.195912
10548731 356985 55 2 1 masque plaire appel 0.782622 2.217378 1.376097 -0.817533 71564.737681 2.854753e+05 1066.928360 -534.195912
7509023 356985 55 2 1 françoise choquet mère 0.782622 2.217378 1.376097 -0.817533 71564.737681 2.854753e+05 1066.928360 -534.195912
17185136 356985 55 2 1 transférer hôpital reine 0.782622 2.217378 1.376097 -0.817533 71564.737681 2.854753e+05 1066.928360 -534.195912
6776722 356985 55 2 1 fabiola bruxelles contracter 0.782622 2.217378 1.376097 -0.817533 71564.737681 2.854753e+05 1066.928360 -534.195912
4445962 356985 55 2 1 contracter covid-19 prendre 0.782622 2.217378 1.376097 -0.817533 71564.737681 2.854753e+05 1066.928360 -534.195912
17706313 356985 75 2 2 venir transférer hôpital 1.043497 2.956503 0.936356 -0.556285 71568.746461 2.854913e+05 1066.883493 -534.173449
171019 357024 511 3 5 15 an venir 2.086993 5.913007 0.631995 -0.375465 71663.954982 2.858710e+05 1065.964591 -533.713367
14500059 356985 587 2 8 reine fabiola bruxelles 2.608741 7.391259 -0.376892 0.223910 71671.371225 2.859006e+05 1065.736060 -533.598945
170940 356985 1107 2 2 15 an soin 1.043497 2.956503 0.936356 -0.556285 71775.599500 2.863164e+05 1064.572937 -533.016586
17405752 356985 1388 2 2 témoignage mère fils 1.043497 2.956503 0.936356 -0.556285 71831.922857 2.865411e+05 1063.945339 -532.702356
4724673 356985 6402 2 34 covid-19 prendre soin 9.391469 26.608531 -2.411926 1.432915 72836.923963 2.905501e+05 1052.855867 -527.150015
18399250 334820 15156 789 55 wout van aert 220.177769 623.822231 38.334502 -22.774358 70148.836642 2.798272e+05 999.301156 -500.335930
15022690 312212 0 433 0 rtl info' post 112.958500 320.041500 30.112502 -17.889704 62579.458550 2.496325e+05 997.896036 -499.632406
7538015 312212 0 433 0 from rtl info' 112.958500 320.041500 30.112502 -17.889704 62579.458550 2.496325e+05 997.896036 -499.632406
10421499 383013 58659 1149 236 marc van ranst 361.310676 1023.689324 41.439505 -24.619026 88528.290446 3.531437e+05 989.741341 -495.549466
17831670 325573 36982 9 12 vie annie cordy 5.478357 15.521643 1.504597 -0.893874 72670.158721 2.898848e+05 938.157282 -469.722059
1507969 346571 61213 60 53 annie cordy décéder 29.478777 83.521223 5.621435 -3.339670 81735.813888 3.260482e+05 926.338020 -463.804322
12885351 279752 7859 380 13 photo from rtl 102.523535 290.476465 27.404026 -16.280610 57648.458910 2.299625e+05 925.042384 -463.155615
15021172 321975 42982 1533 523 rtc télé liège 536.357220 1519.642780 43.034074 -25.566352 73151.613180 2.918054e+05 919.981816 -460.621860
8887323 280842 14583 924 40 institut santé public 251.482666 712.517334 42.408163 -25.194501 59214.689193 2.362103e+05 910.769650 -456.009459
5122140 254875 8005 1116 160 dernier 24 heure 332.875395 943.124605 42.922995 -25.500361 52691.402200 2.101886e+05 880.797437 -441.002797
18720987 237912 4790 683 50 épidémie coronavirus belgique 191.220740 541.779260 35.563354 -21.128031 48646.944221 1.940551e+05 858.109390 -429.643213
18494706 352747 111437 143 134 âge 92 an 72.262135 204.737865 8.321403 -4.943709 93040.573033 3.711434e+05 851.425218 -426.296543
1394768 237861 10719 10 8 an julie melissa 4.695734 13.304266 2.447786 -1.454219 49825.124615 1.987549e+05 842.396433 -421.775958
17941739 219809 991 6 2 virginie dénoncer voisin 2.086993 5.913007 2.708633 -1.609187 44256.929419 1.765431e+05 834.478817 -417.811720
5933956 219454 944 3 1 dénoncer voisin héberger 1.043497 2.956503 1.915293 -1.137867 44176.352945 1.762216e+05 833.933855 -417.538865
8968315 219454 944 3 1 intervention policier suivre 1.043497 2.956503 1.915293 -1.137867 44176.352945 1.762216e+05 833.933855 -417.538865
16483244 219454 944 3 1 suivre profondément marquer 1.043497 2.956503 1.915293 -1.137867 44176.352945 1.762216e+05 833.933855 -417.538865
1304788 219454 944 3 1 ami besoin carolo 1.043497 2.956503 1.915293 -1.137867 44176.352945 1.762216e+05 833.933855 -417.538865
5933958 219454 944 3 1 dénoncer voisin intervention 1.043497 2.956503 1.915293 -1.137867 44176.352945 1.762216e+05 833.933855 -417.538865
18087774 219454 944 3 1 voisin intervention policier 1.043497 2.956503 1.915293 -1.137867 44176.352945 1.762216e+05 833.933855 -417.538865
2509420 219454 944 3 1 besoin carolo dénoncer 1.043497 2.956503 1.915293 -1.137867 44176.352945 1.762216e+05 833.933855 -417.538865
3179108 219454 944 3 1 carolo dénoncer voisin 1.043497 2.956503 1.915293 -1.137867 44176.352945 1.762216e+05 833.933855 -417.538865
In [87]:
fig = px.bar(graph_interactions_media3, x="res_inter_media", y="trigramme", labels={"res_inter_media":"Résiduel de Pearson (<b>en fonction des interactions</b>)","trigramme":"Trigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="green", opacity=0.75, textfont_size=12)
fig
In [88]:
graph_interactions_nonmedia3 = khi2_3.sort_values(by=["res_inter_nonmedia"],ascending=False).head(50)
graph_interactions_nonmedia3
Out[88]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
16935230 54332 4207760 181 596 the voice kids 202.699202 574.300798 -1.524115 0.905470 854289.424018 3.407803e+06 -865.494591 433.340879
18115245 115 3342756 5 823 volg ons ook 216.003783 611.996217 -14.356863 8.529349 670041.693412 2.672829e+06 -818.420255 409.771425
12050652 115 3338395 5 826 ook in het 216.786406 614.213594 -14.384079 8.545517 669167.578968 2.669342e+06 -817.886055 409.503959
12046128 115 3336266 5 821 ons ook in 215.482035 610.517965 -14.338691 8.518553 668740.844354 2.667640e+06 -817.625137 409.373321
8656498 683 3323750 9 852 in het nederlands 224.612630 636.387370 -14.386565 8.546994 666345.999279 2.658087e+06 -815.463487 408.291014
8221272 115 3289625 5 810 het nederlands op 212.612419 602.387581 -14.238329 8.458928 659392.169332 2.630348e+06 -811.888040 406.500838
12266642 115 3231585 5 798 pairi daiza nl 209.481930 593.518070 -14.128031 8.393401 647758.690240 2.583941e+06 -804.691687 402.897726
12054369 115 3228050 5 794 op pairi daiza 208.438433 590.561567 -14.091074 8.371445 647050.138403 2.581115e+06 -804.251303 402.677232
11538468 115 3203780 5 789 nederlands op pairi 207.134063 586.865937 -14.044741 8.343918 642185.484069 2.561710e+06 -801.221258 401.160132
12433340 0 3118599 0 34 partenaire touré collection 8.869721 25.130279 -2.978208 1.769340 625088.839813 2.493510e+06 -790.625600 395.855036
12266932 0 2870770 0 754 pairi daiza' post 196.699097 557.300903 -14.024946 8.332158 575414.244880 2.295356e+06 -758.560640 379.800565
7537804 0 2870770 0 754 from pairi daiza' 196.699097 557.300903 -14.024946 8.332158 575414.244880 2.295356e+06 -758.560640 379.800565
12885242 0 2845866 0 749 photo from pairi 195.394727 553.605273 -13.978366 8.304485 570422.512225 2.275443e+06 -755.263207 378.149587
3874650 0 2700135 0 30 collection likeetpartage cesttouss 7.826224 22.173776 -2.797539 1.662005 541212.337491 2.158923e+06 -735.671352 368.340222
17128108 0 2699866 0 29 touré collection likeetpartage 7.565350 21.434650 -2.750518 1.634071 541158.419402 2.158708e+06 -735.634705 368.321874
11616691 0 2263822 0 617 nl photo from 160.959341 456.040659 -12.686975 7.537276 453758.199603 1.810064e+06 -673.615766 337.269869
4978524 0 2262072 0 598 daiza nl photo 156.002732 441.997268 -12.490105 7.420316 453407.431367 1.808665e+06 -673.355353 337.139485
4524158 0 1768456 0 44 coronavirus confinement day 11.478462 32.521538 -3.387988 2.012789 354467.537923 1.413988e+06 -595.371764 298.094207
12674579 0 1550146 0 270 pensée grégory 3 70.436016 199.563984 -8.392617 4.986016 310709.701594 1.239436e+06 -557.413403 279.088993
1301993 0 1542369 0 10 america' got talent 2.608741 7.391259 -1.615160 0.959559 309150.887553 1.233218e+06 -556.013388 278.388025
9474140 0 1542327 0 5 jury america' got 1.304371 3.695629 -1.142090 0.678511 309142.469116 1.233185e+06 -556.005818 278.384235
8867994 0 1470408 0 15 instagram partenaire touré 3.913112 11.086888 -1.978159 1.175215 294727.097255 1.175681e+06 -542.887739 271.816199
2731876 0 1435319 0 264 bonjour souhaite bon 68.870771 195.129229 -8.298842 4.930305 287693.893467 1.147625e+06 -536.371041 268.553380
18423005 0 1372935 0 13 yao pawa gang 3.391364 9.608636 -1.841566 1.094066 275189.707394 1.097745e+06 -524.585272 262.652413
12254282 5537 1353910 113 324 page météo mons 114.001997 322.998003 -0.093845 0.055753 272486.186270 1.086961e+06 -511.394857 256.048159
12253988 0 1263899 0 147 page merveill monde 38.348498 108.651502 -6.192616 3.679006 253334.641469 1.010564e+06 -503.323595 252.006993
8869151 0 1260098 0 219 instagram tweeter samuelnito5 57.131435 161.868565 -7.558534 4.490491 252572.772861 1.007525e+06 -502.566188 251.627769
514804 0 1196944 0 240 3 bisous 3 62.609792 177.390208 -7.912635 4.700861 239914.248764 9.570298e+05 -489.810421 245.241138
11122716 0 1167195 0 157 monde dicton jour 40.957239 116.042761 -6.399784 3.802083 233951.389193 9.332436e+05 -483.685217 242.174335
6573194 0 1157241 0 11 eudoxie yao pawa 2.869615 8.130385 -1.693994 1.006394 231956.219467 9.252848e+05 -481.618334 241.139476
8014706 0 1155958 0 233 grégory 3 bisous 60.783673 172.216327 -7.796388 4.631800 231699.056240 9.242589e+05 -481.351282 241.005766
12884808 0 1155683 0 982 photo from disneyland 256.178400 725.821600 -16.005574 9.508841 231643.935518 9.240391e+05 -481.294022 240.977097
516727 0 1135904 0 140 3 pensée grégory 36.522379 103.477621 -6.043375 3.590342 227679.452696 9.082245e+05 -477.157681 238.906090
16484010 0 1096392 0 227 suivre réseau social 59.218428 167.781572 -7.695351 4.571774 219759.707247 8.766323e+05 -468.785353 234.714184
134874 0 1079127 0 3 13 an impressionne 0.782622 2.217378 -0.884659 0.525572 216299.128051 8.628279e+05 -465.079701 232.858817
1392041 0 1079127 0 3 an impressionne jury 0.782622 2.217378 -0.884659 0.525572 216299.128051 8.628279e+05 -465.079701 232.858817
8641847 0 1079127 0 3 impressionne jury america' 0.782622 2.217378 -0.884659 0.525572 216299.128051 8.628279e+05 -465.079701 232.858817
7829680 0 1079127 0 3 got talent bohemian 0.782622 2.217378 -0.884659 0.525572 216299.128051 8.628279e+05 -465.079701 232.858817
17369370 0 1077628 0 172 tweeter samuelnito5 lapersévérenceestaurendezvous 44.870351 127.129649 -6.698534 3.979569 215998.670002 8.616293e+05 -464.756571 232.697030
5380191 0 1073806 0 801 disneyland paris bon 208.960182 592.039818 -14.455455 8.587922 215232.592175 8.585734e+05 -463.931668 232.284013
18390204 0 1062361 0 337 woman and material 87.914583 249.085417 -9.376278 5.570405 212938.567912 8.494224e+05 -461.452671 231.042814
3568988 0 1043704 0 15 choisir réponse ici 3.913112 11.086888 -1.978159 1.175215 209198.977640 8.345050e+05 -457.382747 229.005061
18442374 0 1024798 0 175 youtube channel ucefx7o81rsf6-z6sufthoxa 45.652974 129.347026 -6.756698 4.014125 205409.478058 8.193885e+05 -453.221224 226.921445
3395864 0 1012130 0 171 channel ucefx7o81rsf6-z6sufthoxa instagram 44.609477 126.390523 -6.679033 3.967984 202870.316908 8.092597e+05 -450.411275 225.514544
17430581 0 1009648 0 162 ucefx7o81rsf6-z6sufthoxa instagram tweeter 42.261610 119.738390 -6.500893 3.862152 202372.827330 8.072752e+05 -449.858675 225.237865
7536897 0 1009227 0 626 from disneyland paris 163.307208 462.692792 -12.779171 7.592049 202288.442515 8.069386e+05 -449.764875 225.190900
12365916 0 1008497 0 625 paris bon plans' 163.046334 461.953666 -12.768960 7.585982 202142.122050 8.063549e+05 -449.602182 225.109442
2706826 0 1008497 0 625 bon plans' post 163.046334 461.953666 -12.768960 7.585982 202142.122050 8.063549e+05 -449.602182 225.109442
5564676 0 1006727 0 11 dougourouni tv instagram 2.869615 8.130385 -1.693994 1.006394 201787.345035 8.049397e+05 -449.207463 224.911812
10884068 0 999560 0 6 mieux ecouter coeur' 1.565245 4.434755 -1.251097 0.743271 200350.798780 7.992092e+05 -447.605629 224.109796
In [89]:
fig = px.bar(graph_interactions_nonmedia3, x="res_inter_nonmedia", y="trigramme", labels={"res_inter_nonmedia":"Résiduel de Pearson (<b>en fonction des interactions</b>)","trigramme":"Trigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="lightgreen", opacity=0.75, textfont_size=12)
fig

in English

In [90]:
pays = "the French Community of Belgium"
In [91]:
termeMaj = "Lemma"
termeMin = "lemma"
yy = "mot"
media = "media"
source = graph_interactions_media1[:10]
xx= "res_inter_media"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [92]:
termeMaj = "Lemma"
termeMin = "lemma"
yy = "mot"
media = "non-media"
source = graph_interactions_nonmedia1[:10]
xx= "res_inter_nonmedia"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [93]:
termeMaj = "Bigram"
termeMin = "bigram"
yy = "bigramme"
media = "media"
source = graph_interactions_media2[:10]
xx= "res_inter_media"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [94]:
termeMaj = "Bigram"
termeMin = "bigram"
yy = "bigramme"
media = "non-media"
source = graph_interactions_nonmedia2[:10]
xx= "res_inter_nonmedia"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [95]:
termeMaj = "Trigram"
termeMin = "trigram"
yy = "trigramme"
media = "media"
source = graph_interactions_media3[:10]
xx= "res_inter_media"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [96]:
termeMaj = "Trigram"
termeMin = "trigram"
yy = "trigramme"
media = "non-media"
source = graph_interactions_nonmedia3[:10]
xx = "res_inter_nonmedia"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [ ]: