In [1]:
import pandas as pan
import numpy as np
import plotly.express as px
pays = "en France"

France

mots seuls

In [43]:
media1 = pan.read_csv("france-motsSeuls-media-nettoye.csv", low_memory=False, names=["mot","interactions"])
nonmedia1 = pan.read_csv("france-motsSeuls-nonmedia-nettoye.csv", low_memory=False, names=["mot","interactions"])
In [44]:
media1
Out[44]:
mot interactions
0 monde 496340
1 jungle 496340
2 miroir 496340
3 filmer 496340
4 réaction 496340
... ... ...
9129904 nord 455
9129905 littoral 455
9129906 disponible 455
9129907 version 455
9129908 digital 455

9129909 rows × 2 columns

In [45]:
nonmedia1
Out[45]:
mot interactions
0 thevoice 480966
1 histoire 480966
2 rencontre 480966
3 musical 480966
4 talent 480966
... ... ...
38038961 privatisation 411
38038962 route 411
38038963 national 411
38038964 cliquer 411
38038965 ici 411

38038966 rows × 2 columns

In [46]:
media1_table = pan.pivot_table(media1,index=["mot"],values=["mot","interactions"],aggfunc=[len,np.sum])
nonmedia1_table = pan.pivot_table(nonmedia1,index=["mot"],values=["mot","interactions"],aggfunc=[len,np.sum])
In [47]:
media1_table
Out[47]:
len sum
interactions interactions
mot
#12h45 1 607
#a25 1 585
#accidentdelaroute 1 574
#assesrfc 1 1073
#bonne 1 1583
... ... ...
시i 1 2465
시다 1 3330
어오에 1 399
주내6 1 2099
주지 1 2338

150951 rows × 2 columns

In [48]:
nonmedia1_table
Out[48]:
len sum
interactions interactions
mot
# 8 15980
#100kfollower 1 1300
#106cabrel 1 5896
#112day 1 425
#11mai 1 713
... ... ...
1 915
𖡡𖡡𖡡 3 3030
🛕histoir 1 534
🛖 2 2190
􏰒􏰄􏰃􏰁􏰉dever 1 904

626663 rows × 2 columns

In [49]:
media1_table["type"] = "media"
nonmedia1_table["type"] = "non-media"
In [50]:
media1_table
Out[50]:
len sum type
interactions interactions
mot
#12h45 1 607 media
#a25 1 585 media
#accidentdelaroute 1 574 media
#assesrfc 1 1073 media
#bonne 1 1583 media
... ... ... ...
시i 1 2465 media
시다 1 3330 media
어오에 1 399 media
주내6 1 2099 media
주지 1 2338 media

150951 rows × 3 columns

In [51]:
nonmedia1_table
Out[51]:
len sum type
interactions interactions
mot
# 8 15980 non-media
#100kfollower 1 1300 non-media
#106cabrel 1 5896 non-media
#112day 1 425 non-media
#11mai 1 713 non-media
... ... ... ...
1 915 non-media
𖡡𖡡𖡡 3 3030 non-media
🛕histoir 1 534 non-media
🛖 2 2190 non-media
􏰒􏰄􏰃􏰁􏰉dever 1 904 non-media

626663 rows × 3 columns

In [52]:
tableau1 = pan.concat([media1_table,nonmedia1_table], names = ["mot","nb","interactions","type"], levels=0)
In [53]:
tableau1
Out[53]:
len sum type
interactions interactions
mot
#12h45 1 607 media
#a25 1 585 media
#accidentdelaroute 1 574 media
#assesrfc 1 1073 media
#bonne 1 1583 media
... ... ... ...
1 915 non-media
𖡡𖡡𖡡 3 3030 non-media
🛕histoir 1 534 non-media
🛖 2 2190 non-media
􏰒􏰄􏰃􏰁􏰉dever 1 904 non-media

777614 rows × 3 columns

In [54]:
tableau1["mot"] = tableau1.index
tableau1.columns = ["nb","interactions","media","mot"]
tableau1.reset_index(drop=True, inplace=True)
tableau1
Out[54]:
nb interactions media mot
0 1 607 media #12h45
1 1 585 media #a25
2 1 574 media #accidentdelaroute
3 1 1073 media #assesrfc
4 1 1583 media #bonne
... ... ... ... ...
777609 1 915 non-media
777610 3 3030 non-media 𖡡𖡡𖡡
777611 1 534 non-media 🛕histoir
777612 2 2190 non-media 🛖
777613 1 904 non-media 􏰒􏰄􏰃􏰁􏰉dever

777614 rows × 4 columns

In [55]:
khi2_1 = pan.pivot_table(tableau1,index=["mot"],columns=["media"],values=["mot","interactions"],aggfunc=[len,np.sum],fill_value=0)
In [56]:
khi2_1
Out[56]:
len sum
interactions nb interactions nb
media media non-media media non-media media non-media media non-media
mot
# 0 1 0 1 0 15980 0 8
#100kfollower 0 1 0 1 0 1300 0 1
#106cabrel 0 1 0 1 0 5896 0 1
#112day 0 1 0 1 0 425 0 1
#11mai 0 1 0 1 0 713 0 1
... ... ... ... ... ... ... ... ...
0 1 0 1 0 915 0 1
𖡡𖡡𖡡 0 1 0 1 0 3030 0 3
🛕histoir 0 1 0 1 0 534 0 1
🛖 0 1 0 1 0 2190 0 2
􏰒􏰄􏰃􏰁􏰉dever 0 1 0 1 0 904 0 1

669100 rows × 8 columns

In [57]:
khi2_1["mot"] = khi2_1.index
khi2_1.columns = khi2_1.columns.get_level_values(0)
khi2_1.reset_index(drop=True, inplace=True)
khi2_1.columns = ["a","b","c","d","interactions_media","interactions_nonmedia","obs_media","obs_nonmedia","mot"]
khi2_1 = khi2_1.drop(columns=["a","b","c","d"])
khi2_1
Out[57]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot
0 0 15980 0 8 #
1 0 1300 0 1 #100kfollower
2 0 5896 0 1 #106cabrel
3 0 425 0 1 #112day
4 0 713 0 1 #11mai
... ... ... ... ... ...
669095 0 915 0 1
669096 0 3030 0 3 𖡡𖡡𖡡
669097 0 534 0 1 🛕histoir
669098 0 2190 0 2 🛖
669099 0 904 0 1 􏰒􏰄􏰃􏰁􏰉dever

669100 rows × 5 columns

In [58]:
khi2_1.obs_media.sum(), khi2_1.obs_nonmedia.sum()
Out[58]:
(9129875, 38038252)
In [59]:
print("Nb de lignes fichier media1 = ", media1.shape[0])
print("Somme observée média khi2_1 = ", khi2_1.obs_media.sum())

print("Nb de lignes  fichier nonmedia1 = ", nonmedia1.shape[0])
print("Somme observée nonmédia khi2_1 = ", khi2_1.obs_nonmedia.sum())
Nb de lignes fichier media1 =  9129909
Somme observée média khi2_1 =  9129875
Nb de lignes  fichier nonmedia1 =  38038966
Somme observée nonmédia khi2_1 =  38038252
In [60]:
khi2_1["exp_media"] = ((khi2_1.obs_media + khi2_1.obs_nonmedia) * khi2_1.obs_media.sum()) / (khi2_1.obs_media.sum() + khi2_1.obs_nonmedia.sum())
khi2_1["exp_nonmedia"] = ((khi2_1.obs_media + khi2_1.obs_nonmedia) * khi2_1.obs_nonmedia.sum()) / (khi2_1.obs_media.sum() + khi2_1.obs_nonmedia.sum())
khi2_1
Out[60]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia
0 0 15980 0 8 # 1.548482 6.451518
1 0 1300 0 1 #100kfollower 0.193560 0.806440
2 0 5896 0 1 #106cabrel 0.193560 0.806440
3 0 425 0 1 #112day 0.193560 0.806440
4 0 713 0 1 #11mai 0.193560 0.806440
... ... ... ... ... ... ... ...
669095 0 915 0 1 0.193560 0.806440
669096 0 3030 0 3 𖡡𖡡𖡡 0.580681 2.419319
669097 0 534 0 1 🛕histoir 0.193560 0.806440
669098 0 2190 0 2 🛖 0.387121 1.612879
669099 0 904 0 1 􏰒􏰄􏰃􏰁􏰉dever 0.193560 0.806440

669100 rows × 7 columns

In [61]:
khi2_1["res_media"] = (khi2_1.obs_media - khi2_1.exp_media) / np.sqrt(khi2_1.exp_media)
khi2_1["res_nonmedia"] = (khi2_1.obs_nonmedia - khi2_1.exp_nonmedia) / np.sqrt(khi2_1.exp_nonmedia)
khi2_1
Out[61]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia
0 0 15980 0 8 # 1.548482 6.451518 -1.244380 0.609642
1 0 1300 0 1 #100kfollower 0.193560 0.806440 -0.439955 0.215541
2 0 5896 0 1 #106cabrel 0.193560 0.806440 -0.439955 0.215541
3 0 425 0 1 #112day 0.193560 0.806440 -0.439955 0.215541
4 0 713 0 1 #11mai 0.193560 0.806440 -0.439955 0.215541
... ... ... ... ... ... ... ... ... ...
669095 0 915 0 1 0.193560 0.806440 -0.439955 0.215541
669096 0 3030 0 3 𖡡𖡡𖡡 0.580681 2.419319 -0.762024 0.373328
669097 0 534 0 1 🛕histoir 0.193560 0.806440 -0.439955 0.215541
669098 0 2190 0 2 🛖 0.387121 1.612879 -0.622190 0.304821
669099 0 904 0 1 􏰒􏰄􏰃􏰁􏰉dever 0.193560 0.806440 -0.439955 0.215541

669100 rows × 9 columns

In [62]:
graph_media1 = khi2_1.sort_values(by=["res_media"],ascending=False).head(50)
graph_media1
Out[62]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia
170320 134411966 113940983 64979 46074 coronavirus 21495.447728 89557.552272 296.586765 -145.302775
173404 66219724 65052664 37761 30982 covid-19 13305.913061 55437.086939 212.005374 -103.864949
411731 59101581 87692438 25443 35698 mort 11834.467953 49306.532047 125.094124 -61.285686
139324 35496389 56603787 21178 26769 cas 9280.633862 38666.366138 123.498592 -60.504008
649483 25677865 28338142 13588 12057 épidémie 4963.852908 20681.147092 122.407127 -59.969281
599994 16879287 9764535 8950 5648 trump 2825.592698 11772.407302 115.215069 -56.445773
71297 154925171 407435568 62653 142955 an 39797.538261 165810.461739 114.567621 -56.128577
165247 60637750 126961151 26729 45221 confinement 13926.660820 58023.339180 108.484035 -53.148128
389459 46623118 97234695 23708 37861 masque 11917.311745 49651.688255 108.006528 -52.914189
386385 25298769 43458409 15743 20191 mardi 6955.394439 28978.605561 105.368227 -51.621642
302893 29070465 47101861 14863 18664 hôpital 6489.494890 27037.505110 103.944578 -50.924173
326979 23665481 38229219 15266 20386 jeudi 6900.810446 28751.189554 100.699134 -49.334176
374361 28186777 55886818 18053 27550 lundi 8826.928608 36776.071392 98.200117 -48.109866
535182 30644043 54117407 18006 27924 samedi 8890.222814 37039.777186 96.680164 -47.365216
203887 11064228 5494116 5625 3256 donald 1719.008683 7161.991317 94.208966 -46.154535
397049 22565174 37724985 14249 20022 mercredi 6633.503724 27637.496276 93.503253 -45.808794
403481 32336971 53134687 16482 26708 ministre 8359.867697 34830.132303 88.832188 -43.520362
380449 16370405 21488846 9930 11700 maire 4186.708458 17443.291542 88.761473 -43.485718
615546 26982512 53912967 16323 27161 vendredi 8416.774414 35067.225586 86.177990 -42.220026
212053 17485984 26300778 9262 10908 décéder 3904.110476 16265.889524 85.749719 -42.010209
212040 13275568 17881195 7912 8221 décès 3122.707700 13010.292300 85.704905 -41.988254
74324 39734735 82044180 20229 37605 annoncer 11194.364168 46639.635832 85.390771 -41.834355
71155 22037860 25004500 10138 12853 américain 4450.143974 18540.856026 85.263187 -41.771849
211559 11822094 15744902 6967 6532 déconfinement 2612.869971 10886.130029 85.180953 -41.731562
52890 10577017 14236643 7232 7329 accident 2818.430969 11742.569031 83.135444 -40.729433
220755 7483942 7081104 4450 3034 ehpad 1448.604998 6035.395002 78.858437 -38.634056
555292 21790862 33420712 9058 11903 soignant 4057.216643 16903.783357 78.509813 -38.463259
461499 13187714 18604710 7419 8471 patient 3075.672556 12814.327444 78.316365 -38.368486
522156 7821853 5903198 4698 3520 rouvrir 1590.678230 6627.321770 77.910333 -38.169564
228002 16187242 23945990 8388 10682 enquête 3691.194188 15378.805812 77.307060 -37.874011
309878 11538098 15149499 6399 6721 incendie 2539.510632 10580.489368 76.586960 -37.521222
476618 36341279 66478813 15875 29326 policier 8749.117383 36451.882617 76.182732 -37.323184
456179 14437747 20015695 7593 9426 pandémie 3294.202092 13724.797908 74.898291 -36.693915
489592 36439701 71468005 18072 35840 président 10435.220822 43476.779178 74.758299 -36.625330
167565 6197469 5273608 3784 2411 contamination 1199.105820 4995.894180 74.647284 -36.570943
593665 8455565 11778809 5613 5555 toulouse 2161.681001 9006.318999 74.231682 -36.367332
224324 22973633 36598584 10414 16061 emmanuel 5124.507925 21350.492075 73.890378 -36.200121
650486 12655143 10202662 5002 4524 états-unis 1843.855052 7682.144948 73.547628 -36.032203
286462 14360793 18976522 7244 8932 habitant 3131.030791 13044.969209 73.504138 -36.010896
111589 8547576 4342249 3443 2044 biden 1062.065155 4424.934845 73.058698 -35.792668
388725 3379797 2290849 2762 1194 martinique 765.724395 3190.275605 72.141349 -35.343243
198114 26928608 68819354 16344 32489 dimanche 9452.128253 39380.871747 70.887945 -34.729180
601465 21616207 40619060 9827 15604 tuer 4922.431012 20508.568988 69.905449 -34.247839
231605 17059976 6202962 3677 2699 equipe 1234.140228 5141.859772 69.537053 -34.067356
377998 33120163 71582308 15016 29369 macron 8591.172210 35793.827790 69.316285 -33.959198
237591 27671393 57091250 12440 22444 euro 6752.156165 28131.843835 69.219176 -33.911622
64907 18301588 19258261 6812 8763 alerte 3014.701074 12560.298926 69.159630 -33.882449
525186 7119677 9380947 4142 3508 réanimation 1480.736001 6169.263999 69.159069 -33.882175
413630 30243949 53737542 10782 18234 mourir 5616.344550 23399.655450 68.928458 -33.769195
143350 15209044 35944268 11075 19116 centre 5843.777857 24347.222143 68.431540 -33.525746
In [63]:
fig = px.bar(graph_media1, x="res_media", y="mot", labels={"res_media":"Résiduel de Pearson","mot":"Lemme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="red", opacity=0.75, textfont_size=12)
fig
In [64]:
graph_nonmedia1 = khi2_1.sort_values(by=["res_nonmedia"],ascending=False).head(50)
graph_nonmedia1
Out[64]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia
468770 50822084 412400041 16614 189225 photo 39842.250682 165996.749318 -116.370948 57.012057
479711 5163041 152601477 2773 81740 post 16358.358386 68154.641614 -106.218763 52.038334
262420 3521794 126840960 1830 71233 from 14142.093391 58920.906609 -103.532107 50.722096
118672 21716648 317812687 10223 122961 bon 25779.129877 107404.870123 -96.887429 47.466758
651511 16218664 224940590 7184 88998 être 18617.013079 77564.986921 -83.792606 41.051387
111733 28687304 292374266 12258 117324 bien 25081.925815 104500.074185 -80.973061 39.670045
197139 1293981 82699272 588 37652 dieu 7401.744403 30838.255597 -79.198836 38.800823
70432 5088216 120833718 1774 46080 amour 9262.632757 38591.367243 -77.809981 38.120400
61688 4204400 107466272 1524 39905 aimer 8019.008077 33409.991923 -72.530283 35.533788
539058 16205966 178545037 6220 71847 savoir 15110.668940 62956.331060 -72.325691 35.433556
61643 3388628 92312347 891 33937 aime 6741.316790 28086.683210 -71.253633 34.908337
243991 57437770 465150322 25521 178827 faire 39553.652332 164794.347668 -70.558034 34.567551
505369 3006970 89480867 1669 37598 recette 7600.530791 31666.469209 -68.036949 33.332429
466727 33991214 280967141 13053 105015 petit 22853.272963 95214.727037 -64.828201 31.760411
454567 3483104 81907075 1720 35321 page 7169.665649 29871.334351 -64.360648 31.531349
152560 8316658 113228320 2983 43665 chose 9029.199082 37618.800918 -63.629400 31.173098
94149 10283849 137789973 4903 55125 avoir 11619.035382 48408.964618 -62.305723 30.524607
618713 41644987 288345631 13909 106988 vie 23400.854943 97496.145057 -62.049063 30.398865
69682 7111672 127793309 2638 39959 ami 8245.086462 34351.913538 -61.750382 30.252536
106541 13121732 153968593 5387 55935 bel 11869.502360 49452.497640 -59.501296 29.150672
150966 12788331 133967595 4858 52668 chien 11134.747607 46391.252393 -59.483221 29.141816
148309 5924194 91078518 2370 36379 chat 7500.266576 31248.733424 -59.238163 29.021758
199049 6531362 98808402 2643 36897 dire 7653.372743 31886.627257 -57.272165 28.058584
623311 25635919 223791438 10045 80996 voir 17621.919774 73419.080226 -57.077649 27.963287
585564 6222645 85755143 2429 35297 the 7302.254428 30423.745572 -57.028324 27.939121
516221 11345282 130085847 4426 48056 rien 10158.429648 42323.570352 -56.875528 27.864265
303947 11991597 131524167 5541 54439 ici 11609.744489 48370.255511 -56.323226 27.593682
31051 2374109 46025291 322 18458 3 3635.061712 15144.938288 -54.950751 26.921284
582984 28862130 179815816 9133 73376 temps 15970.463622 66538.536378 -54.104859 26.506867
109734 5875842 78807166 2608 34383 besoin 7159.987636 29831.012364 -53.795375 26.355246
322670 16546146 162172562 6094 54618 jamais 11751.430601 48960.569399 -52.188386 25.567956
382853 4432048 75752145 1522 25130 maman 5158.768091 21493.231909 -50.634043 24.806457
442553 3020122 44342511 1121 21080 of 4297.231367 17903.768633 -48.452675 23.737769
394968 16798670 171587069 5077 46052 meilleur 9896.542614 41232.457386 -48.446686 23.734835
459339 6385185 86184931 2982 33267 partager 7016.365922 29232.634078 -48.163626 23.596159
190212 4894738 67106361 2352 28542 demain 5979.850721 24914.149279 -46.914192 22.984040
162467 1274553 42139409 684 16958 commentaire 3414.790135 14227.209865 -46.731134 22.894357
451544 4336550 56737049 1413 21867 oui 4506.082889 18773.917111 -46.077825 22.574291
333380 188084 17281955 122 12006 jésus 2347.498852 9780.501148 -45.932986 22.503332
407896 12350090 122310716 5139 44621 moment 9631.558616 40128.441384 -45.776804 22.426815
119046 2182185 48797181 751 16755 bonheur 3388.465939 14117.534061 -45.309086 22.197672
294210 2701250 49681437 992 18494 heureux 3771.715257 15714.284743 -45.261700 22.174457
367839 3274052 46987176 1055 18548 live 3794.361808 15808.638192 -44.471321 21.787237
146455 4075650 63156273 1527 21767 chance 4508.792733 18785.207267 -44.406579 21.755519
49520 1910405 39960654 682 15642 a 3159.677710 13164.322290 -44.078156 21.594620
164563 2235438 44775159 1196 19209 concours 3949.597137 16455.402863 -43.815120 21.465754
588938 32501212 138124530 8169 60033 timeline 13201.196960 55000.803040 -43.797667 21.457203
330259 14286954 119389578 6577 51212 journée 11185.653956 46603.346044 -43.575603 21.348411
464483 1747189 37005565 500 13217 pensée 2655.066108 11061.933892 -41.823746 20.490147
624504 9510380 78351944 3771 34145 vous 7339.030878 30576.969122 -41.649448 20.404755
In [65]:
fig = px.bar(graph_nonmedia1, x="res_nonmedia", y="mot", labels={"res_nonmedia":"Résiduel de Pearson","mot":"Lemme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="darkorange", opacity=0.75, textfont_size=12)
fig
In [66]:
khi2_1["exp_inter_media"] = ((khi2_1.interactions_media + khi2_1.interactions_nonmedia) * khi2_1.interactions_media.sum()) / (khi2_1.interactions_media.sum() + khi2_1.interactions_nonmedia.sum())
khi2_1["exp_inter_nonmedia"] = ((khi2_1.interactions_media + khi2_1.interactions_nonmedia) * khi2_1.interactions_nonmedia.sum()) / (khi2_1.interactions_media.sum() + khi2_1.interactions_nonmedia.sum())
khi2_1
Out[66]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia
0 0 15980 0 8 # 1.548482 6.451518 -1.244380 0.609642 2794.558910 13185.441090
1 0 1300 0 1 #100kfollower 0.193560 0.806440 -0.439955 0.215541 227.342089 1072.657911
2 0 5896 0 1 #106cabrel 0.193560 0.806440 -0.439955 0.215541 1031.083813 4864.916187
3 0 425 0 1 #112day 0.193560 0.806440 -0.439955 0.215541 74.323375 350.676625
4 0 713 0 1 #11mai 0.193560 0.806440 -0.439955 0.215541 124.688392 588.311608
... ... ... ... ... ... ... ... ... ... ... ...
669095 0 915 0 1 0.193560 0.806440 -0.439955 0.215541 160.013855 754.986145
669096 0 3030 0 3 𖡡𖡡𖡡 0.580681 2.419319 -0.762024 0.373328 529.881946 2500.118054
669097 0 534 0 1 🛕histoir 0.193560 0.806440 -0.439955 0.215541 93.385135 440.614865
669098 0 2190 0 2 🛖 0.387121 1.612879 -0.622190 0.304821 382.983981 1807.016019
669099 0 904 0 1 􏰒􏰄􏰃􏰁􏰉dever 0.193560 0.806440 -0.439955 0.215541 158.090191 745.909809

669100 rows × 11 columns

In [67]:
khi2_1["res_inter_media"] = (khi2_1.interactions_media - khi2_1.exp_inter_media) / np.sqrt(khi2_1.exp_inter_media)
khi2_1["res_inter_nonmedia"] = (khi2_1.interactions_nonmedia - khi2_1.exp_inter_nonmedia) / np.sqrt(khi2_1.exp_inter_nonmedia)
khi2_1
/home/jhroy/.local/lib/python3.7/site-packages/pandas/core/arraylike.py:358: RuntimeWarning:

invalid value encountered in sqrt

Out[67]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
0 0 15980 0 8 # 1.548482 6.451518 -1.244380 0.609642 2794.558910 13185.441090 -52.863588 24.336938
1 0 1300 0 1 #100kfollower 0.193560 0.806440 -0.439955 0.215541 227.342089 1072.657911 -15.077868 6.941434
2 0 5896 0 1 #106cabrel 0.193560 0.806440 -0.439955 0.215541 1031.083813 4864.916187 -32.110494 14.782786
3 0 425 0 1 #112day 0.193560 0.806440 -0.439955 0.215541 74.323375 350.676625 -8.621101 3.968917
4 0 713 0 1 #11mai 0.193560 0.806440 -0.439955 0.215541 124.688392 588.311608 -11.166396 5.140701
... ... ... ... ... ... ... ... ... ... ... ... ... ...
669095 0 915 0 1 0.193560 0.806440 -0.439955 0.215541 160.013855 754.986145 -12.649658 5.823554
669096 0 3030 0 3 𖡡𖡡𖡡 0.580681 2.419319 -0.762024 0.373328 529.881946 2500.118054 -23.019165 10.597389
669097 0 534 0 1 🛕histoir 0.193560 0.806440 -0.439955 0.215541 93.385135 440.614865 -9.663598 4.448854
669098 0 2190 0 2 🛖 0.387121 1.612879 -0.622190 0.304821 382.983981 1807.016019 -19.569977 9.009478
669099 0 904 0 1 􏰒􏰄􏰃􏰁􏰉dever 0.193560 0.806440 -0.439955 0.215541 158.090191 745.909809 -12.573392 5.788443

669100 rows × 13 columns

In [68]:
graph_interactions_media1 = khi2_1.sort_values(by=["res_inter_media"],ascending=False).head(50)
graph_interactions_media1
Out[68]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
170320 134411966 113940983 64979 46074 coronavirus 21495.447728 89557.552272 296.586765 -145.302775 4.343160e+07 4.136390e+07 13805.265977 11284.668023
173404 66219724 65052664 37761 30982 covid-19 13305.913061 55437.086939 212.005374 -103.864949 2.295672e+07 -5.524179e+07 9029.458561 NaN
411731 59101581 87692438 25443 35698 mort 11834.467953 49306.532047 125.094124 -61.285686 2.567112e+07 -4.243456e+07 6598.115423 NaN
231605 17059976 6202962 3677 2699 equipe 1234.140228 5141.859772 69.537053 -34.067356 4.068188e+06 1.919475e+07 6441.223712 -2965.361783
499091 21323721 16676525 3486 6033 raconte 1842.500130 7676.499870 38.288285 -18.758066 6.645427e+06 3.135482e+07 5693.956255 -2621.340452
599994 16879287 9764535 8950 5648 trump 2825.592698 11772.407302 115.215069 -56.445773 4.659432e+06 2.198439e+07 5661.083082 -2606.206549
560871 8374510 281511 620 62 speech 132.008098 549.991902 42.472940 -20.808198 1.513752e+06 7.142269e+06 5576.281486 -2567.166233
649483 25677865 28338142 13588 12057 épidémie 4963.852908 20681.147092 122.407127 -59.969281 9.446240e+06 4.456977e+07 5281.198821 -2431.318311
165247 60637750 126961151 26729 45221 confinement 13926.660820 58023.339180 108.484035 -53.148128 3.280702e+07 -8.765571e+06 4858.936297 NaN
139324 35496389 56603787 21178 26769 cas 9280.633862 38666.366138 123.498592 -60.504008 1.610634e+07 7.599383e+07 4831.481861 -2224.281023
71155 22037860 25004500 10138 12853 américain 4450.143974 18540.856026 85.263187 -41.771849 8.226699e+06 3.881566e+07 4815.234009 -2216.800960
203887 11064228 5494116 5625 3256 donald 1719.008683 7161.991317 94.208966 -46.154535 2.895699e+06 1.366265e+07 4800.286574 -2209.919573
64907 18301588 19258261 6812 8763 alerte 3014.701074 12560.298926 69.159630 -33.882449 6.568411e+06 3.099144e+07 4578.101843 -2107.631850
127748 9783650 4707677 954 1678 brut 509.450608 2122.549392 19.695589 -9.649196 2.534222e+06 1.195711e+07 4553.878274 -2096.479987
403481 32336971 53134687 16482 26708 ministre 8359.867697 34830.132303 88.832188 -43.520362 1.494716e+07 7.052450e+07 4497.960142 -2070.736820
650486 12655143 10202662 5002 4524 états-unis 1843.855052 7682.144948 73.547628 -36.032203 3.997339e+06 1.886047e+07 4330.342267 -1993.570173
476618 36341279 66478813 15875 29326 policier 8749.117383 36451.882617 76.182732 -37.323184 1.798103e+07 -7.871839e+07 4329.835604 NaN
302893 29070465 47101861 14863 18664 hôpital 6489.494890 27037.505110 103.944578 -50.924173 1.332090e+07 6.285142e+07 4315.206527 -1986.602095
389459 46623118 97234695 23708 37861 masque 11917.311745 49651.688255 108.006528 -52.914189 2.515764e+07 -4.485728e+07 4279.623187 NaN
111589 8547576 4342249 3443 2044 biden 1062.065155 4424.934845 73.058698 -35.792668 2.254154e+06 1.063567e+07 4191.747564 -1929.764993
535182 30644043 54117407 18006 27924 samedi 8890.222814 37039.777186 96.680164 -47.365216 1.482296e+07 6.993849e+07 4109.309328 -1891.812703
413630 30243949 53737542 10782 18234 mourir 5616.344550 23399.655450 68.928458 -33.769195 1.468656e+07 6.929493e+07 4059.538663 -1868.899661
489592 36439701 71468005 18072 35840 président 10435.220822 43476.779178 74.758299 -36.625330 1.887074e+07 -7.452049e+07 4044.376487 NaN
74324 39734735 82044180 20229 37605 annoncer 11194.364168 46639.635832 85.390771 -41.834355 2.129652e+07 -6.307505e+07 3995.440182 NaN
561710 24230287 38603802 6538 16979 sport 4551.956671 18965.043329 29.436736 -14.421545 1.098833e+07 5.184576e+07 3994.718331 -1839.058167
168074 68332068 174690918 29925 75114 contre 20331.376315 84707.623685 67.282061 -32.962598 4.249950e+07 3.696603e+07 3962.557843 22652.232397
555292 21790862 33420712 9058 11903 soignant 4057.216643 16903.783357 78.509813 -38.463259 9.655319e+06 4.555626e+07 3905.493377 -1797.981459
326979 23665481 38229219 15266 20386 jeudi 6900.810446 28751.189554 100.699134 -49.334176 1.082405e+07 5.107065e+07 3903.177520 -1796.915302
224324 22973633 36598584 10414 16061 emmanuel 5124.507925 21350.492075 73.890378 -36.200121 1.041790e+07 4.915432e+07 3890.020438 -1790.858144
394257 5911243 1979457 1698 1016 mediapart 525.322550 2188.677450 51.164125 -25.066153 1.379914e+06 6.510786e+06 3857.443563 -1775.860649
386385 25298769 43458409 15743 20191 mardi 6955.394439 28978.605561 105.368227 -51.621642 1.202415e+07 5.673302e+07 3828.200346 -1762.397878
380449 16370405 21488846 9930 11700 maire 4186.708458 17443.291542 88.761473 -43.485718 6.620770e+06 3.123848e+07 3789.082785 -1744.389232
397049 22565174 37724985 14249 20022 mercredi 6633.503724 27637.496276 93.503253 -45.808794 1.054345e+07 4.974670e+07 3702.330059 -1704.450669
199058 25626141 47026915 8139 20281 direct 5500.982634 22919.017366 35.567842 -17.425276 1.270546e+07 5.994760e+07 3624.851555 -1668.781702
212053 17485984 26300778 9262 10908 décéder 3904.110476 16265.889524 85.749719 -42.010209 7.657365e+06 3.612940e+07 3551.835607 -1635.167173
286462 14360793 18976522 7244 8932 habitant 3131.030791 13044.969209 73.504138 -36.010896 5.829981e+06 2.750733e+07 3533.107348 -1626.545199
374361 28186777 55886818 18053 27550 lundi 8826.928608 36776.071392 98.200117 -48.109866 1.470267e+07 6.937093e+07 3516.610268 -1618.950399
26428 21047298 35948591 9627 16896 24 5133.798818 21389.201182 62.709948 -30.722644 9.967357e+06 4.702853e+07 3509.517590 -1615.685125
522156 7821853 5903198 4698 3520 rouvrir 1590.678230 6627.321770 77.910333 -38.169564 2.400217e+06 1.132483e+07 3499.493139 -1611.070144
377998 33120163 71582308 15016 29369 macron 8591.172210 35793.827790 69.316285 -33.959198 1.831021e+07 -7.716520e+07 3461.041795 NaN
228002 16187242 23945990 8388 10682 enquête 3691.194188 15378.805812 77.307060 -37.874011 7.018441e+06 3.311479e+07 3460.925495 -1593.314664
456179 14437747 20015695 7593 9426 pandémie 3294.202092 13724.797908 74.898291 -36.693915 6.025167e+06 2.842827e+07 3427.240941 -1577.807225
260636 59243594 158841646 25494 65247 français 17563.851695 73177.148305 59.837223 -29.315248 3.813843e+07 1.638936e+07 3417.491363 35187.498627
615546 26982512 53912967 16323 27161 vendredi 8416.774414 35067.225586 86.177990 -42.220026 1.414688e+07 6.674860e+07 3412.611121 -1571.072059
128833 4174629 1073397 142 242 buisine 74.327140 309.672860 7.849479 -3.845590 9.177671e+05 4.330259e+06 3399.641415 -1565.101164
328483 7796401 6288871 2860 2396 joe 1017.352735 4238.647265 57.770540 -28.302746 2.463212e+06 1.162206e+07 3398.099850 -1564.391470
212040 13275568 17881195 7912 8221 décès 3122.707700 13010.292300 85.704905 -41.988254 5.448649e+06 2.570811e+07 3353.099429 -1543.674517
237591 27671393 57091250 12440 22444 euro 6752.156165 28131.843835 69.219176 -33.911622 1.482317e+07 6.993948e+07 3337.126695 -1536.321111
601465 21616207 40619060 9827 15604 tuer 4922.431012 20508.568988 69.905449 -34.247839 1.088361e+07 5.135165e+07 3253.255819 -1497.709272
213825 15723271 24809216 6941 10556 dénoncer 3386.723897 14110.276103 61.074689 -29.921503 7.088262e+06 3.344423e+07 3243.343405 -1493.145870
In [69]:
fig = px.bar(graph_interactions_media1, x="res_inter_media", y="mot", labels={"res_inter_media":"Résiduel de Pearson (<b>en fonction des interactions</b>)","mot":"Lemme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="gold", opacity=0.75, textfont_size=12)
fig
In [70]:
graph_interactions_nonmedia1 = khi2_1.sort_values(by=["res_inter_nonmedia"],ascending=False).head(50)
graph_interactions_nonmedia1
Out[70]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
329861 60241606 337839261 27797 133719 jour 31263.079208 130252.920792 -19.603003 9.603836 6.961580e+07 1.350166e+06 -1123.517632 289585.871829
74470 31170838 168355278 14971 66000 année 15672.767940 65298.232060 -5.605582 2.746267 3.489283e+07 1.075830e+06 -630.097514 161276.364104
259525 102166581 323178741 46668 131785 france 34541.409358 143911.590642 65.248223 -31.966187 7.438376e+07 2.384665e+07 3221.344636 61297.047230
248688 39342341 167960078 16943 61631 femme 15208.803993 63365.196007 14.062115 -6.889263 3.625274e+07 7.492225e+06 513.135002 58624.971473
582984 28862130 179815816 9133 73376 temps 15970.463622 66538.536378 -54.104859 26.506867 3.649329e+07 8.627202e+06 -1263.235073 58282.732288
460711 30441742 179999003 13826 68434 passer 15922.267117 66337.732883 -16.612851 8.138911 3.680157e+07 1.008172e+07 -1048.363958 53514.333519
618605 35565389 174332304 13895 57246 vidéo 13770.070569 57370.929431 1.064625 -0.521577 3.670660e+07 9.633641e+06 -188.362252 53063.371875
23051 38268374 171868034 20641 82138 2020 19893.930124 82885.069876 5.296646 -2.594915 3.674835e+07 9.830610e+06 250.745212 51680.308670
468770 50822084 412400041 16614 189225 photo 39842.250682 165996.749318 -116.370948 57.012057 8.100760e+07 5.509962e+07 -3353.789277 48134.807856
260636 59243594 158841646 25494 65247 français 17563.851695 73177.148305 59.837223 -29.315248 3.813843e+07 1.638936e+07 3417.491363 35187.498627
297234 45430009 177178173 24125 64872 homme 17226.282599 71770.717401 52.562083 -25.751036 3.892939e+07 2.012134e+07 1041.875425 35012.926133
398720 40210594 185853705 20086 73972 mettre 18205.891083 75852.108917 13.934040 -6.826517 3.953379e+07 2.297305e+07 107.640870 33982.875205
651511 16218664 224940590 7184 88998 être 18617.013079 77564.986921 -83.792606 41.051387 4.217358e+07 3.542823e+07 -3996.679706 31839.251107
481659 30919778 206466630 15014 84592 pouvoir 19279.763414 80326.236586 -30.721747 15.051094 4.151379e+07 3.231517e+07 -1644.235908 30635.424147
226585 41367591 193549974 17102 72441 enfant 17331.966502 72211.033498 -1.746790 0.855781 4.108204e+07 3.027807e+07 44.551266 29672.033981
623311 25635919 223791438 10045 80996 voir 17621.919774 73419.080226 -57.077649 27.963287 4.361949e+07 4.225042e+07 -2722.924712 27929.250740
615782 34573581 211187073 15828 79006 venir 18356.093846 76477.906154 -18.659631 9.141663 4.297826e+07 3.922494e+07 -1282.025920 27456.911148
600170 33423050 224678457 14885 92172 très 20721.980923 86335.019077 -40.548299 19.865284 4.513641e+07 4.940764e+07 -1743.484351 24935.181637
624466 35230744 232663840 14551 89623 vouloir 20163.946689 84010.053311 -39.527846 19.365347 4.684901e+07 5.748812e+07 -1697.427137 23103.876358
168074 68332068 174690918 29925 75114 contre 20331.376315 84707.623685 67.282061 -32.962598 4.249950e+07 3.696603e+07 3962.557843 22652.232397
483203 39995813 246376515 19537 94572 prendre 22086.967888 92022.032112 -17.158000 8.405989 5.008037e+07 7.273450e+07 -1425.027118 20360.318362
408371 45302524 245358136 16554 85493 monde 19752.244013 82294.755987 -22.756390 11.148733 5.083031e+07 7.627290e+07 -775.335672 19360.678060
170320 134411966 113940983 64979 46074 coronavirus 21495.447728 89557.552272 296.586765 -145.302775 4.343160e+07 4.136390e+07 13805.265977 11284.668023
197139 1293981 82699272 588 37652 dieu 7401.744403 30838.255597 -79.198836 38.800823 1.468862e+07 6.930464e+07 -3494.945888 1608.976715
505369 3006970 89480867 1669 37598 recette 7600.530791 31666.469209 -68.036949 33.332429 1.617414e+07 7.631370e+07 -3274.023443 1507.270113
61643 3388628 92312347 891 33937 aime 6741.316790 28086.683210 -71.253633 34.908337 1.673605e+07 7.896493e+07 -3262.652556 1502.035271
454567 3483104 81907075 1720 35321 page 7169.665649 29871.334351 -64.360648 31.531349 1.493291e+07 7.045727e+07 -2962.960629 1364.065371
517261 2374229 65169508 582 12902 rire 2609.966567 10874.033433 -39.695687 19.447575 1.181195e+07 5.573179e+07 -2746.036586 1264.199523
148309 5924194 91078518 2370 36379 chat 7500.266576 31248.733424 -59.238163 29.021758 1.696369e+07 8.003902e+07 -2680.335309 1233.952467
382853 4432048 75752145 1522 25130 maman 5158.768091 21493.231909 -50.634043 24.806457 1.402249e+07 6.616170e+07 -2561.097824 1179.058817
585564 6222645 85755143 2429 35297 the 7302.254428 30423.745572 -57.028324 27.939121 1.608494e+07 7.589285e+07 -2459.055198 1132.081206
459339 6385185 86184931 2982 33267 partager 7016.365922 29232.634078 -48.163626 23.596159 1.618853e+07 7.638159e+07 -2436.522634 1121.707835
109734 5875842 78807166 2608 34383 besoin 7159.987636 29831.012364 -53.795375 26.355246 1.480924e+07 6.987377e+07 -2321.401688 1068.709326
614773 0 30204468 0 5286 vdm 1023.159543 4262.840457 -31.986865 15.670896 5.282113e+06 2.492236e+07 -2298.284788 1058.066942
162467 1274553 42139409 684 16958 commentaire 3414.790135 14227.209865 -46.731134 22.894357 7.592170e+06 3.582179e+07 -2292.821925 1055.551991
512404 2808977 54495036 1529 11602 respect 2541.639795 10589.360205 -20.086210 9.840567 1.002124e+07 4.728277e+07 -2278.299861 1048.866433
119046 2182185 48797181 751 16755 bonheur 3388.465939 14117.534061 -45.309086 22.197672 8.915197e+06 4.206417e+07 -2254.986259 1038.133494
146455 4075650 63156273 1527 21767 chance 4508.792733 18785.207267 -44.406579 21.755519 1.175742e+07 5.547450e+07 -2240.295356 1031.370207
520161 6850178 82329574 1108 11200 ronaldo 2382.339699 9925.660301 -26.108587 12.791030 1.559562e+07 7.358413e+07 -2214.524974 1019.506234
200102 3732266 59682079 1605 20398 disponible 4258.906435 17744.093565 -40.666478 19.923182 1.108981e+07 5.232454e+07 -2209.381505 1017.138322
314164 5052289 68759111 1626 20561 instagram 4294.521524 17892.478476 -40.720520 19.949658 1.290803e+07 6.090337e+07 -2186.538558 1006.622059
190212 4894738 67106361 2352 28542 demain 5979.850721 24914.149279 -46.914192 22.984040 1.259145e+07 5.940965e+07 -2169.038300 998.565423
294210 2701250 49681437 992 18494 heureux 3771.715257 15714.284743 -45.261700 22.174457 9.160607e+06 4.322208e+07 -2134.160981 982.508867
456736 2226690 45730475 705 12276 papa 2512.605755 10468.394245 -36.061313 17.667035 8.386679e+06 3.957049e+07 -2127.085293 979.251416
265922 6240794 74619727 2693 22519 gagner 4880.041315 20331.958685 -31.307273 15.337952 1.414077e+07 6.671975e+07 -2100.821750 967.160405
257013 260451 27831078 182 9562 for 1886.051189 7857.948811 -39.237913 19.223304 4.912605e+06 2.317892e+07 -2098.931061 966.289983
31051 2374109 46025291 322 18458 3 3635.061712 15144.938288 -54.950751 26.921284 8.464016e+06 3.993538e+07 -2093.256360 963.677507
164563 2235438 44775159 1196 19209 concours 3949.597137 16455.402863 -43.815120 21.465754 8.221144e+06 3.878945e+07 -2087.609472 961.077836
639979 1270441 34766570 540 11816 youtube 2391.630592 9964.369408 -37.862327 18.549382 6.302100e+06 2.973491e+07 -2004.326786 922.736784
49520 1910405 39960654 682 15642 a 3159.677710 13164.322290 -44.078156 21.594620 7.322349e+06 3.454871e+07 -1999.991245 920.740820
In [71]:
fig = px.bar(graph_interactions_nonmedia1, x="res_inter_nonmedia", y="mot", labels={"res_inter_nonmedia":"Résiduel de Pearson (<b>en fonction des interactions</b>)","mot":"Lemme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="lightyellow", opacity=0.75, textfont_size=12)
fig

bigrammes

In [2]:
media2 = pan.read_csv("france-bigrammes-media-nettoye.csv", low_memory=False, names=["bigramme","interactions"])
nonmedia2 = pan.read_csv("france-bigrammes-nonmedia-nettoye.csv", low_memory=False, names=["bigramme","interactions"])
In [3]:
media2
Out[3]:
bigramme interactions
0 monde jungle 496340
1 jungle miroir 496340
2 miroir filmer 496340
3 filmer réaction 496340
4 réaction animal 496340
... ... ...
8751501 mvp 34 480
8751502 nord littoral 455
8751503 littoral disponible 455
8751504 disponible version 455
8751505 version digital 455

8751506 rows × 2 columns

In [4]:
nonmedia2
Out[4]:
bigramme interactions
0 thevoice histoire 480966
1 histoire rencontre 480966
2 rencontre musical 480966
3 musical talent 480966
4 talent coach 480966
... ... ...
36825961 nouvelle privatisation 411
36825962 privatisation route 411
36825963 route national 411
36825964 national cliquer 411
36825965 cliquer ici 411

36825966 rows × 2 columns

In [5]:
media2_table = pan.pivot_table(media2,index=["bigramme"],values=["bigramme","interactions"],aggfunc=[len,np.sum])
nonmedia2_table = pan.pivot_table(nonmedia2,index=["bigramme"],values=["bigramme","interactions"],aggfunc=[len,np.sum])
In [7]:
media2_table
Out[7]:
len sum
interactions interactions
bigramme
#12h45 dominique 1 607
#a25 convoi 1 585
#accidentdelaroute lens 1 574
#assesrfc cdf 1 1073
#bonne idée 1 1583
... ... ...
시i spect 1 2465
시다 equipe 1 3330
어오에 louvier 1 399
주내6 beta 1 2099
주지 আn8 1 2338

3231502 rows × 2 columns

In [8]:
nonmedia2_table
Out[8]:
len sum
interactions interactions
bigramme
# panafricanteam 1 1056
# photo 1 5923
# teamhac 1 1260
# timeline 1 516
#100kfollower iyi 1 1300
... ... ...
𖡡𖡡𖡡 contact 1 1167
🛕histoir savoir 1 534
🛖 beaucoup 1 1294
🛖 elobi 1 896
􏰒􏰄􏰃􏰁􏰉dever affluer 1 904

11954116 rows × 2 columns

In [9]:
media2_table["type"] = "media"
nonmedia2_table["type"] = "non-media"
In [10]:
media2_table
Out[10]:
len sum type
interactions interactions
bigramme
#12h45 dominique 1 607 media
#a25 convoi 1 585 media
#accidentdelaroute lens 1 574 media
#assesrfc cdf 1 1073 media
#bonne idée 1 1583 media
... ... ... ...
시i spect 1 2465 media
시다 equipe 1 3330 media
어오에 louvier 1 399 media
주내6 beta 1 2099 media
주지 আn8 1 2338 media

3231502 rows × 3 columns

In [11]:
nonmedia2_table
Out[11]:
len sum type
interactions interactions
bigramme
# panafricanteam 1 1056 non-media
# photo 1 5923 non-media
# teamhac 1 1260 non-media
# timeline 1 516 non-media
#100kfollower iyi 1 1300 non-media
... ... ... ...
𖡡𖡡𖡡 contact 1 1167 non-media
🛕histoir savoir 1 534 non-media
🛖 beaucoup 1 1294 non-media
🛖 elobi 1 896 non-media
􏰒􏰄􏰃􏰁􏰉dever affluer 1 904 non-media

11954116 rows × 3 columns

In [12]:
tableau2 = pan.concat([media2_table,nonmedia2_table], names = ["bigramme","nb","interactions","type"], levels=0)
In [13]:
tableau2
Out[13]:
len sum type
interactions interactions
bigramme
#12h45 dominique 1 607 media
#a25 convoi 1 585 media
#accidentdelaroute lens 1 574 media
#assesrfc cdf 1 1073 media
#bonne idée 1 1583 media
... ... ... ...
𖡡𖡡𖡡 contact 1 1167 non-media
🛕histoir savoir 1 534 non-media
🛖 beaucoup 1 1294 non-media
🛖 elobi 1 896 non-media
􏰒􏰄􏰃􏰁􏰉dever affluer 1 904 non-media

15185618 rows × 3 columns

In [14]:
tableau2["bigramme"] = tableau2.index
tableau2.columns = ["nb","interactions","media","bigramme"]
tableau2.reset_index(drop=True, inplace=True)
tableau2
Out[14]:
nb interactions media bigramme
0 1 607 media #12h45 dominique
1 1 585 media #a25 convoi
2 1 574 media #accidentdelaroute lens
3 1 1073 media #assesrfc cdf
4 1 1583 media #bonne idée
... ... ... ... ...
15185613 1 1167 non-media 𖡡𖡡𖡡 contact
15185614 1 534 non-media 🛕histoir savoir
15185615 1 1294 non-media 🛖 beaucoup
15185616 1 896 non-media 🛖 elobi
15185617 1 904 non-media 􏰒􏰄􏰃􏰁􏰉dever affluer

15185618 rows × 4 columns

In [15]:
khi2_2 = pan.pivot_table(tableau2,index=["bigramme"],columns=["media"],values=["bigramme","interactions"],aggfunc=[len,np.sum],fill_value=0)
In [16]:
khi2_2
Out[16]:
len sum
interactions nb interactions nb
media media non-media media non-media media non-media media non-media
bigramme
# panafricanteam 0 1 0 1 0 1056 0 1
# photo 0 1 0 1 0 5923 0 1
# teamhac 0 1 0 1 0 1260 0 1
# timeline 0 1 0 1 0 516 0 1
#100kfollower iyi 0 1 0 1 0 1300 0 1
... ... ... ... ... ... ... ... ...
𖡡𖡡𖡡 contact 0 1 0 1 0 1167 0 1
🛕histoir savoir 0 1 0 1 0 534 0 1
🛖 beaucoup 0 1 0 1 0 1294 0 1
🛖 elobi 0 1 0 1 0 896 0 1
􏰒􏰄􏰃􏰁􏰉dever affluer 0 1 0 1 0 904 0 1

13711286 rows × 8 columns

In [17]:
khi2_2["bigramme"] = khi2_2.index
khi2_2.columns = khi2_2.columns.get_level_values(0)
khi2_2.reset_index(drop=True, inplace=True)
khi2_2.columns = ["a","b","c","d","interactions_media","interactions_nonmedia","obs_media","obs_nonmedia","bigramme"]
khi2_2 = khi2_2.drop(columns=["a","b","c","d"])
khi2_2
Out[17]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme
0 0 1056 0 1 # panafricanteam
1 0 5923 0 1 # photo
2 0 1260 0 1 # teamhac
3 0 516 0 1 # timeline
4 0 1300 0 1 #100kfollower iyi
... ... ... ... ... ...
13711281 0 1167 0 1 𖡡𖡡𖡡 contact
13711282 0 534 0 1 🛕histoir savoir
13711283 0 1294 0 1 🛖 beaucoup
13711284 0 896 0 1 🛖 elobi
13711285 0 904 0 1 􏰒􏰄􏰃􏰁􏰉dever affluer

13711286 rows × 5 columns

In [18]:
khi2_2.obs_media.sum(), khi2_2.obs_nonmedia.sum()
Out[18]:
(8751506, 36825966)
In [19]:
print("Nb de lignes fichier media2 = ", media2.shape[0])
print("Somme observée média khi2_2 = ", khi2_2.obs_media.sum())

print("Nb de lignes  fichier nonmedia2 = ", nonmedia2.shape[0])
print("Somme observée nonmédia khi2_2 = ", khi2_2.obs_nonmedia.sum())
Nb de lignes fichier media2 =  8751506
Somme observée média khi2_2 =  8751506
Nb de lignes  fichier nonmedia2 =  36825966
Somme observée nonmédia khi2_2 =  36825966
In [20]:
khi2_2["exp_media"] = ((khi2_2.obs_media + khi2_2.obs_nonmedia) * khi2_2.obs_media.sum()) / (khi2_2.obs_media.sum() +khi2_2.obs_nonmedia.sum())
khi2_2["exp_nonmedia"] = ((khi2_2.obs_media + khi2_2.obs_nonmedia) * khi2_2.obs_nonmedia.sum()) / (khi2_2.obs_media.sum() +khi2_2.obs_nonmedia.sum())
khi2_2
Out[20]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia
0 0 1056 0 1 # panafricanteam 0.192014 0.807986
1 0 5923 0 1 # photo 0.192014 0.807986
2 0 1260 0 1 # teamhac 0.192014 0.807986
3 0 516 0 1 # timeline 0.192014 0.807986
4 0 1300 0 1 #100kfollower iyi 0.192014 0.807986
... ... ... ... ... ... ... ...
13711281 0 1167 0 1 𖡡𖡡𖡡 contact 0.192014 0.807986
13711282 0 534 0 1 🛕histoir savoir 0.192014 0.807986
13711283 0 1294 0 1 🛖 beaucoup 0.192014 0.807986
13711284 0 896 0 1 🛖 elobi 0.192014 0.807986
13711285 0 904 0 1 􏰒􏰄􏰃􏰁􏰉dever affluer 0.192014 0.807986

13711286 rows × 7 columns

In [21]:
khi2_2["res_media"] = (khi2_2.obs_media - khi2_2.exp_media) / np.sqrt(khi2_2.exp_media)
khi2_2["res_nonmedia"] = (khi2_2.obs_nonmedia - khi2_2.exp_nonmedia) / np.sqrt(khi2_2.exp_nonmedia)
khi2_2
Out[21]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia
0 0 1056 0 1 # panafricanteam 0.192014 0.807986 -0.438194 0.213614
1 0 5923 0 1 # photo 0.192014 0.807986 -0.438194 0.213614
2 0 1260 0 1 # teamhac 0.192014 0.807986 -0.438194 0.213614
3 0 516 0 1 # timeline 0.192014 0.807986 -0.438194 0.213614
4 0 1300 0 1 #100kfollower iyi 0.192014 0.807986 -0.438194 0.213614
... ... ... ... ... ... ... ... ... ...
13711281 0 1167 0 1 𖡡𖡡𖡡 contact 0.192014 0.807986 -0.438194 0.213614
13711282 0 534 0 1 🛕histoir savoir 0.192014 0.807986 -0.438194 0.213614
13711283 0 1294 0 1 🛖 beaucoup 0.192014 0.807986 -0.438194 0.213614
13711284 0 896 0 1 🛖 elobi 0.192014 0.807986 -0.438194 0.213614
13711285 0 904 0 1 􏰒􏰄􏰃􏰁􏰉dever affluer 0.192014 0.807986 -0.438194 0.213614

13711286 rows × 9 columns

In [22]:
graph_media2 = khi2_2.sort_values(by=["res_media"],ascending=False).head(50)
graph_media2
Out[22]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia
4687647 15404479 69437 3121 8 equipe timeline 600.811345 2528.188655 102.816778 -50.121991
3997070 10645201 4715817 5418 2816 donald trump 1581.042064 6652.957936 96.497336 -47.041336
8660274 5576673 2079760 3496 1332 nouveau cas 927.042881 3900.957119 84.373671 -41.131189
4489487 21993617 31262248 9867 13743 emmanuel macron 4533.447065 19076.552935 79.214071 -38.615944
306290 6657823 3902233 3311 1744 24 heure 970.630026 4084.369974 75.120359 -36.620307
2404226 3147927 1037159 2229 695 cas covid-19 561.448506 2362.551494 70.375953 -34.307464
9699151 8007213 12320551 5379 5718 port masque 2130.777725 8966.222275 70.368236 -34.303702
13544236 6809076 5769085 3786 2856 épidémie coronavirus 1275.356010 5366.643990 70.302281 -34.271550
6789760 7187866 2968655 2686 1347 joe biden 774.391868 3258.608132 68.693948 -33.487506
12282764 5505174 4297431 3099 2000 tester positif 979.078635 4119.921365 67.750260 -33.027469
2404183 3846260 995873 2123 760 cas coronavirus 553.575938 2329.424062 66.703973 -32.517416
5567761 6446984 5682654 3764 3219 garde vue 1340.832734 5642.167266 66.175326 -32.259707
5379461 5238599 952747 1758 466 france 24 427.038809 1796.961191 64.406800 -31.397570
7950590 3772420 2433760 2449 1421 mettre examen 743.093610 3126.906390 62.579694 -30.506877
2320644 11221007 932468 1510 351 canal sport 357.337780 1503.662220 60.976514 -29.725346
7769521 4695489 5042460 3176 2763 masque obligatoire 1140.370272 4798.629728 60.280361 -29.385979
7756227 1208701 19151 1085 16 martinique premier 211.407252 889.592748 60.082650 -29.289597
7536217 1079410 50157 1101 44 maine libre 219.855861 925.144139 59.426196 -28.969584
9728217 3646542 3149202 2304 1446 positif covid-19 720.051948 3029.948052 59.028129 -28.775531
10757774 4404775 3069084 2266 1467 retrouver mort 716.787713 3016.212287 57.864960 -28.208499
2485448 3501491 4121783 2858 2634 centre ville 1054.540080 4437.459920 55.536045 -27.073180
860599 8846232 1565338 1800 963 alerte info 530.534275 2232.465725 55.114295 -26.867582
2404055 2595970 811523 1542 637 cas confirmer 418.398185 1760.601815 54.931004 -26.778230
3880047 2699190 889091 1194 265 direct coronavirus 280.148211 1178.851789 54.598642 -26.616207
3266261 5380513 5005858 2734 2508 contre covid-19 1006.536616 4235.463384 54.449521 -26.543512
1187690 3152301 2210158 1793 1019 appel témoin 539.942954 2272.057046 53.925838 -26.288223
13544274 4056756 3184793 2186 1660 épidémie covid-19 738.485278 3107.514722 53.266243 -25.966678
6741882 4871465 5932581 2852 2896 jean castex 1103.695626 4644.304374 52.625033 -25.654095
7074868 3410752 3582844 2075 1562 lancer appel 698.354383 2938.645617 52.093582 -25.395019
642007 1408075 11435 805 11 actualité international 156.683304 659.316696 51.793570 -25.248767
6774923 4280658 8309157 3121 3553 jeune homme 1281.500454 5392.499546 51.385466 -25.049821
3327232 3784464 1251623 1401 627 coronavirus france 389.404093 1638.595907 51.263329 -24.990280
9532698 3334991 2477932 2004 1508 placer garde 674.352651 2837.647349 51.202709 -24.960729
748567 1262159 786492 1195 401 agence régional 306.454109 1289.545891 50.757139 -24.743519
11045458 1248466 742992 1183 393 régional santé 302.613832 1273.386168 50.609124 -24.671363
8661185 2514285 1064243 1401 659 nouveau coronavirus 395.548537 1664.451463 50.554663 -24.644814
10113732 2430861 897959 1325 624 président américain 374.234999 1574.765001 49.147450 -23.958814
2406958 1429779 1296666 1301 643 cas positif 373.274930 1570.725070 48.018090 -23.408264
9728174 2838177 1970592 1467 948 positif coronavirus 463.713455 1951.286545 46.590797 -22.712475
3406990 920480 265802 881 218 courrier ouest 211.023224 887.976776 46.120565 -22.483243
8661992 1254306 273830 908 250 nouveau décès 222.352042 935.647958 45.981234 -22.415320
56200 5727990 10065696 3059 4075 11 mai 1369.826826 5764.173174 45.639549 -22.248753
2483815 1938608 2092498 1722 1424 centre hospitalier 604.075581 2541.924419 45.484854 -22.173340
3744236 1325841 241366 822 185 dernier 24 193.357950 813.642050 45.208741 -22.038739
8045790 4673819 4167067 2096 2137 ministre santé 812.794639 3420.205361 45.009662 -21.941690
10929550 3241472 3592667 1949 1902 royaume uni 739.445347 3111.554653 44.480786 -21.683869
1025999 3148432 1154962 1147 602 an mourir 335.832229 1413.167771 44.263882 -21.578131
6845861 1105265 77588 599 20 journal afrique 118.856575 500.143425 44.041224 -21.469588
3266192 5837415 7744376 2340 2728 contre coronavirus 973.126206 4094.873794 43.817120 -21.360340
11259439 5574303 6739925 2203 2521 samuel paty 907.073441 3816.926559 43.028793 -20.976039
In [23]:
fig = px.bar(graph_media2, x="res_media", y="bigramme", labels={"res_media":"Résiduel de Pearson","bigramme":"Bigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="navy", opacity=0.75, textfont_size=12)
fig
In [24]:
graph_nonmedia2 = khi2_2.sort_values(by=["res_nonmedia"],ascending=False).head(50)
graph_nonmedia2
Out[24]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia
9439871 3429659 120614316 1785 68477 photo from 13491.277326 56770.722674 -100.784158 49.131112
12340659 32496923 138063120 8166 59991 timeline photo 13087.088166 55069.911834 -43.016952 20.970267
9825542 863821 32133225 421 10730 prendre soin 2141.146473 9009.853527 -37.174252 18.122018
1992785 119621 20184638 50 6028 bon soirée 1167.060197 4910.939803 -32.698659 15.940219
9390876 0 3664993 0 5425 pet alert 1041.675151 4383.324849 -32.274993 15.733686
12395759 0 8663729 0 5165 tonton jo 991.751550 4173.248450 -31.492087 15.352028
12659817 16902 13089787 10 4493 twitter for 864.638379 3638.361621 -29.064653 14.168682
11544746 562 2774595 1 4144 silhouette normal 795.897420 3349.102580 -28.176208 13.735576
8629888 0 2715484 0 4075 normal taille 782.456450 3292.543550 -27.972423 13.636233
6863388 772394 22636611 163 5429 joyeux anniversaire 1073.741465 4518.258535 -27.793628 13.549073
1990166 459031 17325632 315 6237 bon journée 1258.074764 5293.925236 -26.588443 12.961558
5293106 8925 10013989 5 3421 for iphone 657.839460 2768.160540 -25.453437 12.408256
6793655 1085873 8378779 143 4462 johnny hallyday 884.223792 3720.776208 -24.926899 12.151575
1987755 504335 13545189 189 4715 bon chance 941.635934 3962.364066 -24.526944 11.956602
2656011 406142 10705354 183 4650 chien chat 928.002951 3904.997049 -24.455881 11.921959
8802400 868334 8485731 86 3635 of the 714.483546 3006.516454 -23.512445 11.462045
6929604 19893 4028522 12 2965 jésus christ 571.625240 2405.374760 -23.406776 11.410533
2834942 35612 6189844 29 3069 cliquer lien 594.858916 2503.141084 -23.200705 11.310075
531612 65470 6582243 30 3029 abonner vous 587.370376 2471.629624 -22.997883 11.211202
1787169 124620 13213939 53 3214 bel soirée 627.309257 2639.690743 -22.930048 11.178133
1785343 318492 17623246 360 5515 bel journée 1128.081385 4746.918615 -22.868465 11.148113
3706853 98996 7529539 60 3219 demain appartenir 629.613423 2649.386577 -22.700908 11.066430
859126 0 1767591 0 2627 alert pet 504.420391 2122.579609 -22.459305 10.948652
9618360 562 1858211 1 2625 poil court 504.228377 2121.771623 -22.410497 10.924858
10609896 585052 12869823 369 5384 rendez vou 1104.655695 4648.344305 -22.134063 10.790100
12347514 692429 19109985 222 4347 tirage sort 877.311293 3691.688707 -22.124364 10.785372
3221009 4549 1718803 5 2542 contact 06 489.059283 2057.940717 -21.888591 10.670435
2595137 202648 7556648 105 3356 chat chien 664.559945 2796.440055 -21.705987 10.581418
11795554 0 9301712 0 2408 souvien vidéo 462.369358 1945.630642 -21.502776 10.482355
12134133 49928 1798994 16 2513 taille moyen 485.603034 2043.396966 -21.310331 10.388540
9813234 12838 4014805 4 2383 premier chaîne 458.337067 1928.662933 -21.221969 10.345465
2615227 1312 3876077 1 2332 chaîne réinformation 447.968319 1885.031681 -21.118015 10.294788
9571322 2499 2164757 1 2301 plateau albion 442.015889 1859.984111 -20.976610 10.225855
12655077 1312 3868975 1 2299 tvlibertés premier 441.631861 1858.368139 -20.967454 10.221391
9703630 2201 1649652 4 2297 porte collier 441.823875 1859.176125 -20.829308 10.154047
3488306 5225889 53841114 741 7370 cristiano ronaldo 1557.424360 6553.575640 -20.687713 10.085021
11818052 1218 5150009 1 2237 sponsoriser facebook 429.727003 1808.272997 -20.681618 10.082050
1988543 101569 8087411 90 3002 bon dimanche 593.706833 2498.293167 -20.672444 10.077577
8175543 6903 2090780 7 2281 mont serein 439.327695 1848.672305 -20.626178 10.055024
1989811 227060 7476309 110 3130 bon humeur 622.124883 2617.875117 -20.532276 10.009247
12134289 11764 1583221 8 2252 taille petit 433.951307 1826.048693 -20.447464 9.967902
5506270 0 4080311 0 2158 féminin 100 414.365894 1743.634106 -20.355979 9.923305
42177 470 4079008 1 2158 100 social 414.557908 1744.442092 -20.311581 9.901661
9672086 374434 9187528 200 3737 pomme terre 755.958539 3181.041461 -20.220571 9.857295
13406506 0 3955838 0 2083 éclaireur éclaireur 399.964855 1683.035145 -19.999121 9.749341
11747974 140732 6987133 84 2783 souhaite bon 550.503716 2316.496284 -19.882683 9.692578
8175627 116229 3156039 41 2414 mont ventoux 471.394009 1983.605991 -19.823219 9.663590
1993646 187415 8119280 73 2607 bon week 514.597126 2165.402874 -19.466710 9.489796
1991116 12090 5319872 12 2066 bon nuit 399.004786 1678.995214 -19.374356 9.444775
10449107 55922 4377236 25 2169 recette cuisine 421.278393 1772.721607 -19.307045 9.411961
In [25]:
fig = px.bar(graph_nonmedia2, x="res_nonmedia", y="bigramme", labels={"res_nonmedia":"Résiduel de Pearson","bigramme":"Bigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="aqua", opacity=0.75, textfont_size=12)
fig
In [26]:
khi2_2["exp_inter_media"] = ((khi2_2.interactions_media + khi2_2.interactions_nonmedia) * khi2_2.interactions_media.sum()) / (khi2_2.interactions_media.sum() + khi2_2.interactions_nonmedia.sum())
khi2_2["exp_inter_nonmedia"] = ((khi2_2.interactions_media + khi2_2.interactions_nonmedia) * khi2_2.interactions_nonmedia.sum()) / (khi2_2.interactions_media.sum() + khi2_2.interactions_nonmedia.sum())
khi2_2
Out[26]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia
0 0 1056 0 1 # panafricanteam 0.192014 0.807986 -0.438194 0.213614 183.228032 872.771968
1 0 5923 0 1 # photo 0.192014 0.807986 -0.438194 0.213614 1027.707986 4895.292014
2 0 1260 0 1 # teamhac 0.192014 0.807986 -0.438194 0.213614 218.624356 1041.375644
3 0 516 0 1 # timeline 0.192014 0.807986 -0.438194 0.213614 89.531879 426.468121
4 0 1300 0 1 #100kfollower iyi 0.192014 0.807986 -0.438194 0.213614 225.564812 1074.435188
... ... ... ... ... ... ... ... ... ... ... ...
13711281 0 1167 0 1 𖡡𖡡𖡡 contact 0.192014 0.807986 -0.438194 0.213614 202.487797 964.512203
13711282 0 534 0 1 🛕histoir savoir 0.192014 0.807986 -0.438194 0.213614 92.655084 441.344916
13711283 0 1294 0 1 🛖 beaucoup 0.192014 0.807986 -0.438194 0.213614 224.523744 1069.476256
13711284 0 896 0 1 🛖 elobi 0.192014 0.807986 -0.438194 0.213614 155.466209 740.533791
13711285 0 904 0 1 􏰒􏰄􏰃􏰁􏰉dever affluer 0.192014 0.807986 -0.438194 0.213614 156.854300 747.145700

13711286 rows × 11 columns

In [27]:
khi2_2["res_inter_media"] = (khi2_2.interactions_media - khi2_2.exp_inter_media) / np.sqrt(khi2_2.exp_inter_media)
khi2_2["res_inter_nonmedia"] = (khi2_2.interactions_nonmedia - khi2_2.exp_inter_nonmedia) / np.sqrt(khi2_2.exp_inter_nonmedia)
khi2_2
/home/jhroy/.local/lib/python3.7/site-packages/pandas/core/arraylike.py:358: RuntimeWarning:

invalid value encountered in sqrt

Out[27]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
0 0 1056 0 1 # panafricanteam 0.192014 0.807986 -0.438194 0.213614 183.228032 872.771968 -13.536175 6.202139
1 0 5923 0 1 # photo 0.192014 0.807986 -0.438194 0.213614 1027.707986 4895.292014 -32.057885 14.688601
2 0 1260 0 1 # teamhac 0.192014 0.807986 -0.438194 0.213614 218.624356 1041.375644 -14.785951 6.774774
3 0 516 0 1 # timeline 0.192014 0.807986 -0.438194 0.213614 89.531879 426.468121 -9.462129 4.335452
4 0 1300 0 1 #100kfollower iyi 0.192014 0.807986 -0.438194 0.213614 225.564812 1074.435188 -15.018815 6.881470
... ... ... ... ... ... ... ... ... ... ... ... ... ...
13711281 0 1167 0 1 𖡡𖡡𖡡 contact 0.192014 0.807986 -0.438194 0.213614 202.487797 964.512203 -14.229821 6.519961
13711282 0 534 0 1 🛕histoir savoir 0.192014 0.807986 -0.438194 0.213614 92.655084 441.344916 -9.625751 4.410422
13711283 0 1294 0 1 🛖 beaucoup 0.192014 0.807986 -0.438194 0.213614 224.523744 1069.476256 -14.984116 6.865572
13711284 0 896 0 1 🛖 elobi 0.192014 0.807986 -0.438194 0.213614 155.466209 740.533791 -12.468609 5.712991
13711285 0 904 0 1 􏰒􏰄􏰃􏰁􏰉dever affluer 0.192014 0.807986 -0.438194 0.213614 156.854300 747.145700 -12.524149 5.738439

13711286 rows × 13 columns

In [28]:
graph_interactions_media2 = khi2_2.sort_values(by=["res_inter_media"],ascending=False).head(50)
graph_interactions_media2
Out[28]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
4687647 15404479 69437 3121 8 equipe timeline 600.811345 2528.188655 102.816778 -50.121991 2.684901e+06 1.278902e+07 7762.624795 -3556.756706
2320644 11221007 932468 1510 351 canal sport 357.337780 1503.662220 60.976514 -29.725346 2.108766e+06 1.004471e+07 6274.959738 -2875.123521
860599 8846232 1565338 1800 963 alerte info 530.534275 2232.465725 55.114295 -26.867582 1.806526e+06 8.605044e+06 5237.600985 -2399.816160
12214781 10481091 3714583 464 993 temps là 279.764184 1177.235816 11.014835 -5.369605 2.463111e+06 1.173256e+07 5108.847636 -2340.822669
3997070 10645201 4715817 5418 2816 donald trump 1581.042064 6652.957936 96.497336 -47.041336 2.665312e+06 1.269571e+07 4887.906292 -2239.589565
4489487 21993617 31262248 9867 13743 emmanuel macron 4533.447065 19076.552935 79.214071 -38.615944 9.240499e+06 4.401537e+07 4195.354315 -1922.269205
6789760 7187866 2968655 2686 1347 joe biden 774.391868 3258.608132 68.693948 -33.487506 1.762272e+06 8.394249e+06 4087.057974 -1872.648910
5379461 5238599 952747 1758 466 france 24 427.038809 1796.961191 64.406800 -31.397570 1.074269e+06 5.117077e+06 4017.802577 -1840.916783
11060430 4149086 439405 135 169 rémy buisine 58.372211 245.627789 10.029591 -4.889310 7.961555e+05 3.792336e+06 3757.730363 -1721.754306
5678959 5222425 1587108 986 587 gmt canal 302.037790 1270.962210 39.355140 -19.185176 1.181532e+06 5.628001e+06 3717.532158 -1703.335893
8660274 5576673 2079760 3496 1332 nouveau cas 927.042881 3900.957119 84.373671 -41.131189 1.328478e+06 6.327955e+06 3685.760945 -1688.778642
306290 6657823 3902233 3311 1744 24 heure 970.630026 4084.369974 75.120359 -36.620307 1.832290e+06 8.727766e+06 3564.906771 -1633.404473
1781829 4012793 676834 681 387 bein sport 205.070795 862.929205 33.234599 -16.201483 8.137037e+05 3.875923e+06 3546.444851 -1624.945407
2173961 2859114 45700 56 37 buisine direct 17.857288 75.142712 9.026176 -4.400157 5.040183e+05 2.400796e+06 3317.304905 -1519.955785
2404183 3846260 995873 2123 760 cas coronavirus 553.575938 2329.424062 66.703973 -32.517416 8.401652e+05 4.001968e+06 3279.595707 -1502.677810
5627285 6519478 4989406 1884 2044 george floyd 754.230414 3173.769586 41.137497 -20.054054 1.996923e+06 9.511961e+06 3200.392905 -1466.387882
13544236 6809076 5769085 3786 2856 épidémie coronavirus 1275.356010 5366.643990 70.302281 -34.271550 2.182454e+06 1.039571e+07 3131.781262 -1434.950716
3327232 3784464 1251623 1401 627 coronavirus france 389.404093 1638.595907 51.263329 -24.990280 8.738185e+05 4.162269e+06 3113.713907 -1426.672435
8476035 2649766 210436 241 104 nature fr 66.244779 278.755221 21.471102 -10.466914 4.962776e+05 2.363924e+06 3056.892576 -1400.637472
5567761 6446984 5682654 3764 3219 garde vue 1340.832734 5642.167266 66.175326 -32.259707 2.104630e+06 1.002501e+07 2993.210544 -1371.459005
11114239 2785856 453104 716 173 résumé vidéo 170.700315 718.299685 41.736698 -20.346158 5.619965e+05 2.676964e+06 2966.474101 -1359.208635
12282764 5505174 4297431 3099 2000 tester positif 979.078635 4119.921365 67.750260 -33.027469 1.700864e+06 8.101741e+06 2917.031958 -1336.554742
12762904 2158501 0 350 0 urgent timeline 67.204848 282.795152 34.496232 -16.816514 3.745245e+05 1.783976e+06 2915.070071 -1335.655825
307557 2158501 9391 350 8 24 urgent 68.740959 289.259041 33.923347 -16.537240 3.761540e+05 1.791738e+06 2906.092595 -1331.542436
2404226 3147927 1037159 2229 695 cas covid-19 561.448506 2362.551494 70.375953 -34.307464 7.261601e+05 3.458926e+06 2841.948119 -1302.152081
1025999 3148432 1154962 1147 602 an mourir 335.832229 1413.167771 44.263882 -21.578131 7.466879e+05 3.556706e+06 2779.439209 -1273.511126
10757774 4404775 3069084 2266 1467 retrouver mort 716.787713 3016.212287 57.864960 -28.208499 1.296800e+06 6.177059e+06 2729.236119 -1250.508575
3266261 5380513 5005858 2734 2508 contre covid-19 1006.536616 4235.463384 54.449521 -26.543512 1.802154e+06 8.584217e+06 2665.557340 -1221.331598
3880047 2699190 889091 1194 265 direct coronavirus 280.148211 1178.851789 54.598642 -26.616207 6.226076e+05 2.965673e+06 2631.733682 -1205.833938
2404055 2595970 811523 1542 637 cas confirmer 418.398185 1760.601815 54.931004 -26.778230 5.912389e+05 2.816254e+06 2607.201822 -1194.593686
2004476 1735143 12448 391 9 bonjour météo 76.805541 323.194459 35.851080 -17.476987 3.032270e+05 1.444364e+06 2600.361083 -1191.459328
7950590 3772420 2433760 2449 1421 mettre examen 743.093610 3126.906390 62.579694 -30.506877 1.076843e+06 5.129337e+06 2597.619694 -1190.203251
2154223 1695735 28874 70 17 brut rencontrer 16.705205 70.294795 13.039438 -6.356576 2.992393e+05 1.425370e+06 2552.879264 -1169.703635
2154082 2668086 995576 243 188 brut nature 82.757971 348.242029 17.614549 -8.586889 6.356871e+05 3.027975e+06 2549.102223 -1167.973033
8045790 4673819 4167067 2096 2137 ministre santé 812.794639 3420.205361 45.009662 -21.941690 1.533994e+06 7.306892e+06 2535.090605 -1161.553051
13544274 4056756 3184793 2186 1660 épidémie covid-19 738.485278 3107.514722 53.266243 -25.966678 1.256491e+06 5.985058e+06 2498.154854 -1144.629460
8227457 2708513 1191619 948 653 mort george 307.414178 1293.585822 36.535527 -17.810647 6.767173e+05 3.223415e+06 2469.882972 -1131.675568
8226125 2106712 514973 679 229 mort coronavirus 174.348578 733.651422 38.219262 -18.631448 4.548922e+05 2.166793e+06 2449.110660 -1122.157904
10113732 2430861 897959 1325 624 président américain 374.234999 1574.765001 49.147450 -23.958814 5.775882e+05 2.751232e+06 2438.541757 -1117.315338
8661185 2514285 1064243 1401 659 nouveau coronavirus 395.548537 1664.451463 50.554663 -24.644814 6.209154e+05 2.957613e+06 2402.808695 -1100.942808
9699151 8007213 12320551 5379 5718 port masque 2130.777725 8966.222275 70.368236 -34.303702 3.527099e+06 1.680067e+07 2385.504791 -1093.014333
13031197 6671510 9166418 2289 3730 violence policier 1155.731380 4863.268620 33.335303 -16.250575 2.748061e+06 1.308987e+07 2366.763343 -1084.427190
11259439 5574303 6739925 2203 2521 samuel paty 907.073441 3816.926559 43.028793 -20.976039 2.136659e+06 1.017757e+07 2351.761747 -1077.553609
12221384 2498812 1137747 644 516 tempête alex 222.736069 937.263931 28.226628 -13.760155 6.309844e+05 3.005575e+06 2351.405151 -1077.390221
642007 1408075 11435 805 11 actualité international 156.683304 659.316696 51.793570 -25.248767 2.463012e+05 1.173209e+06 2340.929670 -1072.590460
9532698 3334991 2477932 2004 1508 placer garde 674.352651 2837.647349 51.202709 -24.960729 1.008608e+06 4.804315e+06 2316.433631 -1061.366622
7769521 4695489 5042460 3176 2763 masque obligatoire 1140.370272 4798.629728 60.280361 -29.385979 1.689645e+06 8.048304e+06 2312.430426 -1059.532393
1187690 3152301 2210158 1793 1019 appel témoin 539.942954 2272.057046 53.925838 -26.288223 9.304477e+05 4.432011e+06 2303.400104 -1055.394790
10326835 3048974 2088877 272 647 raconte histoire 176.460731 742.539269 7.192136 -3.506083 8.914757e+05 4.246375e+06 2285.050026 -1046.986969
8150166 1321030 4089 12 3 monde jungle 2.880208 12.119792 5.373695 -2.619614 2.299232e+05 1.095196e+06 2275.494576 -1042.608758
In [29]:
fig = px.bar(graph_interactions_media2, x="res_inter_media", y="bigramme", labels={"res_inter_media":"Résiduel de Pearson (<b>en fonction des interactions</b>)","bigramme":"Bigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [30]:
graph_interactions_nonmedia2 = khi2_2.sort_values(by=["res_inter_nonmedia"],ascending=False).head(50)
graph_interactions_nonmedia2
Out[30]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
9825542 863821 32133225 421 10730 prendre soin 2141.146473 9009.853527 -37.174252 18.122018 5.725363e+06 2.727168e+07 -2031.760681 930.932335
1992785 119621 20184638 50 6028 bon soirée 1167.060197 4910.939803 -32.698659 15.940219 3.523020e+06 1.678124e+07 -1813.240168 830.808431
6863388 772394 22636611 163 5429 joyeux anniversaire 1073.741465 4518.258535 -27.793628 13.549073 4.061729e+06 1.934728e+07 -1632.122084 747.821944
1785343 318492 17623246 360 5515 bel journée 1128.081385 4746.918615 -22.868465 11.148113 3.113096e+06 1.482864e+07 -1583.886353 725.720817
3488306 5225889 53841114 741 7370 cristiano ronaldo 1557.424360 6553.575640 -20.687713 10.085021 1.024880e+07 4.881820e+07 -1568.985193 718.893255
13117942 72388 14194182 44 1700 voir vidéo 334.872159 1409.127841 -15.895077 7.748666 2.475412e+06 1.179116e+07 -1527.335351 699.809716
12659817 16902 13089787 10 4493 twitter for 864.638379 3638.361621 -29.064653 14.168682 2.274160e+06 1.083253e+07 -1496.823803 685.829631
1990166 459031 17325632 315 6237 bon journée 1258.074764 5293.925236 -26.588443 12.961558 3.085842e+06 1.469882e+07 -1495.346890 685.152924
12347514 692429 19109985 222 4347 tirage sort 877.311293 3691.688707 -22.124364 10.785372 3.435944e+06 1.636647e+07 -1480.077116 678.156467
1787169 124620 13213939 53 3214 bel soirée 627.309257 2639.690743 -22.930048 11.178133 2.314392e+06 1.102417e+07 -1439.396475 659.517006
10822014 0 11259672 0 1288 rire chanson 247.313842 1040.686158 -15.726215 7.666348 1.953681e+06 9.305991e+06 -1397.741530 640.431129
10157637 0 10987428 0 384 ptdr comique 73.733319 310.266681 -8.586811 4.185971 1.906444e+06 9.080984e+06 -1380.740362 632.641365
8126535 0 10987428 0 384 mojito whisky 73.733319 310.266681 -8.586811 4.185971 1.906444e+06 9.080984e+06 -1380.740362 632.641365
3533096 343204 14063248 111 1442 crédit vidéo 298.197513 1254.802487 -10.840469 5.284603 2.499684e+06 1.190677e+07 -1363.963755 624.954492
5293106 8925 10013989 5 3421 for iphone 657.839460 2768.160540 -25.453437 12.408256 1.739090e+06 8.283824e+06 -1311.977737 601.135020
11795554 0 9301712 0 2408 souvien vidéo 462.369358 1945.630642 -21.502776 10.482355 1.613953e+06 7.687759e+06 -1270.414505 582.091165
1987755 504335 13545189 189 4715 bon chance 941.635934 3962.364066 -24.526944 11.956602 2.437752e+06 1.161177e+07 -1238.314160 567.383109
13433451 4645 8854546 1 674 écouter rire 129.609351 545.390649 -11.296772 5.507046 1.537171e+06 7.322020e+06 -1236.080343 566.359597
12395759 0 8663729 0 5165 tonton jo 991.751550 4173.248450 -31.492087 15.352028 1.503256e+06 7.160473e+06 -1226.073283 561.774462
9149892 416 8083754 1 658 partage autoriser 126.537129 532.462871 -11.159975 5.440359 1.402696e+06 6.681474e+06 -1184.003264 542.498402
7684266 962 7978675 1 171 mardi 2105 33.026383 138.973617 -5.572850 2.716700 1.384558e+06 6.595079e+06 -1175.854841 538.764877
10609896 585052 12869823 369 5384 rendez vou 1104.655695 4648.344305 -22.134063 10.790100 2.334574e+06 1.112030e+07 -1145.026674 524.639720
7796111 5917 7537930 6 524 mathieu rivrin 101.767342 428.232658 -9.493213 4.627832 1.308943e+06 6.234904e+06 -1138.918854 521.841179
7870507 1422340 19113517 263 2764 meilleur joueur 581.225932 2445.774068 -13.199670 6.434687 3.563205e+06 1.697265e+07 -1134.145835 519.654229
12878945 45970 7778733 10 235 venir marquer 47.043394 197.956606 -5.400842 2.632848 1.357675e+06 6.467028e+06 -1125.740485 515.802982
11575353 574577 12437006 377 3404 site internet 726.004377 3054.995623 -12.952728 6.314305 2.257658e+06 1.075393e+07 -1120.149334 513.241173
1988543 101569 8087411 90 3002 bon dimanche 593.706833 2498.293167 -20.672444 10.077577 1.420881e+06 6.768099e+06 -1106.798898 507.124138
8492969 0 7020414 0 390 naître brûler 74.885403 315.114597 -8.653635 4.218547 1.218122e+06 5.802292e+06 -1103.685562 505.697638
6489952 2725 7037784 3 367 instagram snapchat 71.045126 298.954874 -8.072906 3.935448 1.221609e+06 5.818900e+06 -1102.798535 505.291211
2165163 0 6988861 0 344 brûler gomme 66.052765 277.947235 -8.127285 3.961958 1.212647e+06 5.776214e+06 -1101.202531 504.559938
5691157 0 6987431 0 342 gomme instagram 65.668738 276.331262 -8.103625 3.950424 1.212399e+06 5.775032e+06 -1101.089866 504.508316
8252609 0 6985873 0 342 moto cher 65.668738 276.331262 -8.103625 3.950424 1.212129e+06 5.773744e+06 -1100.967103 504.452068
1392126 0 6983800 0 340 assurance moto 65.284710 274.715290 -8.079895 3.938856 1.211769e+06 5.772031e+06 -1100.803739 504.377216
2632999 0 6983800 0 340 cher naître 65.284710 274.715290 -8.079895 3.938856 1.211769e+06 5.772031e+06 -1100.803739 504.377216
2656011 406142 10705354 183 4650 chien chat 928.002951 3904.997049 -24.455881 11.921959 1.927971e+06 9.183525e+06 -1096.012822 502.182066
7730101 465804 11207384 129 1235 marquer but 261.906895 1102.093105 -8.212475 4.003487 2.025431e+06 9.647757e+06 -1095.877573 502.120096
971511 4320 6874684 3 696 ami photo 134.217683 564.782317 -11.326285 5.521433 1.193586e+06 5.685418e+06 -1088.559235 498.766907
1527458 416 6823113 1 376 autoriser mathieu 72.389223 304.610777 -8.390652 4.090346 1.183960e+06 5.639569e+06 -1087.716955 498.380983
6119870 90535 7719186 58 2068 huile olive 408.221451 1717.778549 -17.333842 8.450048 1.355076e+06 6.454645e+06 -1086.303079 497.733159
9158479 0 6741951 0 334 partenaire assurance 64.132627 269.867373 -8.008285 3.903946 1.169805e+06 5.572146e+06 -1081.575386 495.566978
2835101 0 6654471 0 609 cliquer écouter 116.936436 492.063564 -10.813715 5.271561 1.154627e+06 5.499844e+06 -1074.535499 492.341372
12398573 21865 6861246 16 1103 top 50 214.863501 904.136499 -13.566685 6.613602 1.194298e+06 5.688813e+06 -1072.831993 491.560843
3706853 98996 7529539 60 3219 demain appartenir 629.613423 2649.386577 -22.700908 11.066430 1.323638e+06 6.304897e+06 -1064.448090 487.719423
11661354 0 6349761 0 427 soirée partage 81.989915 345.010085 -9.054828 4.414124 1.101756e+06 5.248005e+06 -1049.645598 480.937070
1993646 187415 8119280 73 2607 bon week 514.597126 2165.402874 -19.466710 9.489796 1.441306e+06 6.865389e+06 -1044.435759 478.549973
9059465 0 6257085 0 1123 papa chouch 215.631557 907.368443 -14.684398 7.158475 1.085676e+06 5.171409e+06 -1041.957552 477.414484
5695785 1434 6120021 3 955 google play 183.949271 774.050729 -13.341597 6.503874 1.062142e+06 5.059313e+06 -1029.211410 471.574330
9129742 0 5960298 0 195 parlement rire 37.442701 157.557299 -6.119044 2.982963 1.034180e+06 4.926118e+06 -1016.946220 465.954543
1172352 17192 6116512 13 970 app store 188.749617 794.250383 -12.792380 6.236137 1.064268e+06 5.069436e+06 -1014.968594 465.048415
531612 65470 6582243 30 3029 abonner vous 587.370376 2471.629624 -22.997883 11.211202 1.153454e+06 5.494259e+06 -1013.030120 464.160225
In [31]:
fig = px.bar(graph_interactions_nonmedia2, x="res_inter_nonmedia", y="bigramme", labels={"res_inter_nonmedia":"Résiduel de Pearson (<b>en fonction des interactions</b>)","bigramme":"Bigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="cyan", opacity=0.75, textfont_size=12)
fig

trigrammes

In [3]:
media3 = pan.read_csv("france-trigrammes-media-nettoye.csv", low_memory=False, names=["trigramme","interactions"])
nonmedia3 = pan.read_csv("france-trigrammes-nonmedia-nettoye.csv", low_memory=False, names=["trigramme","interactions"])
In [4]:
media3
Out[4]:
trigramme interactions
0 monde jungle miroir 496340
1 jungle miroir filmer 496340
2 miroir filmer réaction 496340
3 filmer réaction animal 496340
4 réaction animal sauvage 496340
... ... ...
8373459 antetokounmpo ocks mvp 480
8373460 ocks mvp 34 480
8373461 nord littoral disponible 455
8373462 littoral disponible version 455
8373463 disponible version digital 455

8373464 rows × 2 columns

In [5]:
nonmedia3
Out[5]:
trigramme interactions
0 thevoice histoire rencontre 480966
1 histoire rencontre musical 480966
2 rencontre musical talent 480966
3 musical talent coach 480966
4 talent coach donne 480966
... ... ...
35617243 avril nouvelle privatisation 411
35617244 nouvelle privatisation route 411
35617245 privatisation route national 411
35617246 route national cliquer 411
35617247 national cliquer ici 411

35617248 rows × 2 columns

In [6]:
media3_table = pan.pivot_table(media3,index=["trigramme"],values=["trigramme","interactions"],aggfunc=[len,np.sum])
nonmedia3_table = pan.pivot_table(nonmedia3,index=["trigramme"],values=["trigramme","interactions"],aggfunc=[len,np.sum])
In [7]:
media3_table
Out[7]:
len sum
interactions interactions
trigramme
#12h45 dominique hutton 1 607
#a25 convoi tracteur 1 585
#accidentdelaroute lens sallaumines 1 574
#bonne idée photo 1 1583
#bretagne auray vannes 1 745
... ... ...
시i spect rabiot 1 2465
시다 equipe timeline 1 3330
어오에 louvier bleu 1 399
주내6 beta nîmes 1 2099
주지 আn8 ar 1 2338

5804983 rows × 2 columns

In [8]:
nonmedia3_table
Out[8]:
len sum
interactions interactions
trigramme
# photo from 1 5923
# timeline photo 1 516
#100kfollower iyi aksamlar 1 1300
#106cabrel ca drôle 1 5896
#11mai 11mai2020 deconfinement 1 713
... ... ...
𖡡𖡡𖡡 contact fb 1 1167
🛕histoir savoir cliquer 1 534
🛖 beaucoup cadeau 1 1294
🛖 elobi elobistory 1 896
􏰒􏰄􏰃􏰁􏰉dever affluer dire 1 904

23881075 rows × 2 columns

In [9]:
media3_table["type"] = "media"
nonmedia3_table["type"] = "non-media"
In [10]:
media3_table
Out[10]:
len sum type
interactions interactions
trigramme
#12h45 dominique hutton 1 607 media
#a25 convoi tracteur 1 585 media
#accidentdelaroute lens sallaumines 1 574 media
#bonne idée photo 1 1583 media
#bretagne auray vannes 1 745 media
... ... ... ...
시i spect rabiot 1 2465 media
시다 equipe timeline 1 3330 media
어오에 louvier bleu 1 399 media
주내6 beta nîmes 1 2099 media
주지 আn8 ar 1 2338 media

5804983 rows × 3 columns

In [11]:
nonmedia3_table
Out[11]:
len sum type
interactions interactions
trigramme
# photo from 1 5923 non-media
# timeline photo 1 516 non-media
#100kfollower iyi aksamlar 1 1300 non-media
#106cabrel ca drôle 1 5896 non-media
#11mai 11mai2020 deconfinement 1 713 non-media
... ... ... ...
𖡡𖡡𖡡 contact fb 1 1167 non-media
🛕histoir savoir cliquer 1 534 non-media
🛖 beaucoup cadeau 1 1294 non-media
🛖 elobi elobistory 1 896 non-media
􏰒􏰄􏰃􏰁􏰉dever affluer dire 1 904 non-media

23881075 rows × 3 columns

In [13]:
tableau3 = pan.concat([media3_table,nonmedia3_table], names = ["trigramme","nb","interactions","type"], levels=0)
In [14]:
tableau3
Out[14]:
len sum type
interactions interactions
trigramme
#12h45 dominique hutton 1 607 media
#a25 convoi tracteur 1 585 media
#accidentdelaroute lens sallaumines 1 574 media
#bonne idée photo 1 1583 media
#bretagne auray vannes 1 745 media
... ... ... ...
𖡡𖡡𖡡 contact fb 1 1167 non-media
🛕histoir savoir cliquer 1 534 non-media
🛖 beaucoup cadeau 1 1294 non-media
🛖 elobi elobistory 1 896 non-media
􏰒􏰄􏰃􏰁􏰉dever affluer dire 1 904 non-media

29686058 rows × 3 columns

In [15]:
tableau3["trigramme"] = tableau3.index
tableau3.columns = ["nb","interactions","media","trigramme"]
tableau3.reset_index(drop=True, inplace=True)
tableau3
Out[15]:
nb interactions media trigramme
0 1 607 media #12h45 dominique hutton
1 1 585 media #a25 convoi tracteur
2 1 574 media #accidentdelaroute lens sallaumines
3 1 1583 media #bonne idée photo
4 1 745 media #bretagne auray vannes
... ... ... ... ...
29686053 1 1167 non-media 𖡡𖡡𖡡 contact fb
29686054 1 534 non-media 🛕histoir savoir cliquer
29686055 1 1294 non-media 🛖 beaucoup cadeau
29686056 1 896 non-media 🛖 elobi elobistory
29686057 1 904 non-media 􏰒􏰄􏰃􏰁􏰉dever affluer dire

29686058 rows × 4 columns

In [16]:
khi2_3 = pan.pivot_table(tableau3,index=["trigramme"],columns=["media"],values=["trigramme","interactions"],aggfunc=[len,np.sum],fill_value=0)
In [17]:
khi2_3
Out[17]:
len sum
interactions nb interactions nb
media media non-media media non-media media non-media media non-media
trigramme
# photo from 0 1 0 1 0 5923 0 1
# timeline photo 0 1 0 1 0 516 0 1
#100kfollower iyi aksamlar 0 1 0 1 0 1300 0 1
#106cabrel ca drôle 0 1 0 1 0 5896 0 1
#11mai 11mai2020 deconfinement 0 1 0 1 0 713 0 1
... ... ... ... ... ... ... ... ...
𖡡𖡡𖡡 contact fb 0 1 0 1 0 1167 0 1
🛕histoir savoir cliquer 0 1 0 1 0 534 0 1
🛖 beaucoup cadeau 0 1 0 1 0 1294 0 1
🛖 elobi elobistory 0 1 0 1 0 896 0 1
􏰒􏰄􏰃􏰁􏰉dever affluer dire 0 1 0 1 0 904 0 1

28777047 rows × 8 columns

In [18]:
khi2_3["trigramme"] = khi2_3.index
khi2_3.columns = khi2_3.columns.get_level_values(0)
khi2_3.reset_index(drop=True, inplace=True)
khi2_3.columns = ["a","b","c","d","interactions_media","interactions_nonmedia","obs_media","obs_nonmedia","trigramme"]
khi2_3 = khi2_3.drop(columns=["a","b","c","d"])
khi2_3
Out[18]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme
0 0 5923 0 1 # photo from
1 0 516 0 1 # timeline photo
2 0 1300 0 1 #100kfollower iyi aksamlar
3 0 5896 0 1 #106cabrel ca drôle
4 0 713 0 1 #11mai 11mai2020 deconfinement
... ... ... ... ... ...
28777042 0 1167 0 1 𖡡𖡡𖡡 contact fb
28777043 0 534 0 1 🛕histoir savoir cliquer
28777044 0 1294 0 1 🛖 beaucoup cadeau
28777045 0 896 0 1 🛖 elobi elobistory
28777046 0 904 0 1 􏰒􏰄􏰃􏰁􏰉dever affluer dire

28777047 rows × 5 columns

In [19]:
khi2_3.obs_media.sum(), khi2_3.obs_nonmedia.sum()
Out[19]:
(8373464, 35617248)
In [20]:
print("Nb de lignes fichier media3 = ", media3.shape[0])
print("Somme observée média khi2_3 = ", khi2_3.obs_media.sum())

print("Nb de lignes  fichier nonmedia3 = ", nonmedia3.shape[0])
print("Somme observée nonmédia khi2_3 = ", khi2_3.obs_nonmedia.sum())
Nb de lignes fichier media3 =  8373464
Somme observée média khi2_3 =  8373464
Nb de lignes  fichier nonmedia3 =  35617248
Somme observée nonmédia khi2_3 =  35617248
In [21]:
khi2_3["exp_media"] = ((khi2_3.obs_media + khi2_3.obs_nonmedia) * khi2_3.obs_media.sum()) / (khi2_3.obs_media.sum() + khi2_3.obs_nonmedia.sum())
khi2_3["exp_nonmedia"] = ((khi2_3.obs_media + khi2_3.obs_nonmedia) * khi2_3.obs_nonmedia.sum()) / (khi2_3.obs_media.sum() + khi2_3.obs_nonmedia.sum())
khi2_3
Out[21]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia
0 0 5923 0 1 # photo from 0.190346 0.809654
1 0 516 0 1 # timeline photo 0.190346 0.809654
2 0 1300 0 1 #100kfollower iyi aksamlar 0.190346 0.809654
3 0 5896 0 1 #106cabrel ca drôle 0.190346 0.809654
4 0 713 0 1 #11mai 11mai2020 deconfinement 0.190346 0.809654
... ... ... ... ... ... ... ...
28777042 0 1167 0 1 𖡡𖡡𖡡 contact fb 0.190346 0.809654
28777043 0 534 0 1 🛕histoir savoir cliquer 0.190346 0.809654
28777044 0 1294 0 1 🛖 beaucoup cadeau 0.190346 0.809654
28777045 0 896 0 1 🛖 elobi elobistory 0.190346 0.809654
28777046 0 904 0 1 􏰒􏰄􏰃􏰁􏰉dever affluer dire 0.190346 0.809654

28777047 rows × 7 columns

In [22]:
khi2_3["res_media"] = (khi2_3.obs_media - khi2_3.exp_media) / np.sqrt(khi2_3.exp_media)
khi2_3["res_nonmedia"] = (khi2_3.obs_nonmedia - khi2_3.exp_nonmedia) / np.sqrt(khi2_3.exp_nonmedia)
khi2_3
Out[22]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia
0 0 5923 0 1 # photo from 0.190346 0.809654 -0.436287 0.211541
1 0 516 0 1 # timeline photo 0.190346 0.809654 -0.436287 0.211541
2 0 1300 0 1 #100kfollower iyi aksamlar 0.190346 0.809654 -0.436287 0.211541
3 0 5896 0 1 #106cabrel ca drôle 0.190346 0.809654 -0.436287 0.211541
4 0 713 0 1 #11mai 11mai2020 deconfinement 0.190346 0.809654 -0.436287 0.211541
... ... ... ... ... ... ... ... ... ...
28777042 0 1167 0 1 𖡡𖡡𖡡 contact fb 0.190346 0.809654 -0.436287 0.211541
28777043 0 534 0 1 🛕histoir savoir cliquer 0.190346 0.809654 -0.436287 0.211541
28777044 0 1294 0 1 🛖 beaucoup cadeau 0.190346 0.809654 -0.436287 0.211541
28777045 0 896 0 1 🛖 elobi elobistory 0.190346 0.809654 -0.436287 0.211541
28777046 0 904 0 1 􏰒􏰄􏰃􏰁􏰉dever affluer dire 0.190346 0.809654 -0.436287 0.211541

28777047 rows × 9 columns

In [23]:
graph_media3 = khi2_3.sort_values(by=["res_media"],ascending=False).head(50)
graph_media3
Out[23]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia
9678661 15404479 69437 3121 8 equipe timeline photo 595.593198 2533.406802 103.480015 -50.174018
11835921 4908461 64729 936 58 gmt canal sport 189.204103 804.795897 54.292101 -26.324434
19970192 3300254 2455579 1976 1492 placer garde vue 660.120553 2807.879447 51.215861 -24.832868
1498679 1240458 722696 1170 379 agence régional santé 294.846233 1254.153767 50.966707 -24.712061
7697218 1317470 202890 812 152 dernier 24 heure 183.493718 780.506282 46.397975 -22.496835
20348812 2395920 3102477 1848 1798 port masque obligatoire 694.002174 2951.997826 43.805090 -21.239631
14884032 531691 22842 544 18 libre maine libre 106.974553 455.025447 42.253869 -20.487496
15577429 531691 22842 544 18 maine libre maine 106.974553 455.025447 42.253869 -20.487496
25726403 1804847 1683107 1124 689 tester positif covid-19 345.097625 1467.902375 41.928762 -20.329863
18228837 1183394 14104 516 6 observateur france 24 99.360706 422.639294 41.797749 -20.266338
14667309 1330015 641145 829 308 lancer appel témoin 216.423607 920.576393 41.639733 -20.189722
13008926 874056 2960 412 3 image amateur témoignage 78.993665 336.006335 37.467642 -18.166814
26566996 871360 2960 411 3 témoignage observateur contenu 78.803319 335.196681 37.421659 -18.144518
16297966 871360 2960 411 3 meilleur image amateur 78.803319 335.196681 37.421659 -18.144518
18228695 871360 2960 411 3 observateur contenu vérifier 78.803319 335.196681 37.421659 -18.144518
6615330 871360 2960 411 3 contenu vérifier expliquer 78.803319 335.196681 37.421659 -18.144518
25083925 870270 2960 411 3 suivre actualité international 78.803319 335.196681 37.421659 -18.144518
1923384 871360 2960 411 3 amateur témoignage observateur 78.803319 335.196681 37.421659 -18.144518
1298015 871360 2960 411 3 actualité international meilleur 78.803319 335.196681 37.421659 -18.144518
13522422 871360 2960 411 3 international meilleur image 78.803319 335.196681 37.421659 -18.144518
636263 975527 50078 437 30 24 heure france 88.891666 378.108334 36.921888 -17.902196
17143097 2678152 1183057 942 647 mort george floyd 302.460081 1286.539919 36.773386 -17.830192
23699530 1018271 713824 723 381 santé public france 210.142183 893.857817 35.378577 -17.153895
25726387 1730998 1170433 832 562 tester positif coronavirus 265.342575 1128.657425 34.787011 -16.867065
26708824 2158501 0 350 0 urgent timeline photo 66.621163 283.378837 34.718528 -16.833860
641985 2158501 0 350 0 24 urgent timeline 66.621163 283.378837 34.718528 -16.833860
17973979 801554 120921 461 111 nouveau cas coronavirus 108.878015 463.121985 33.746058 -16.362341
17973989 844036 154039 479 137 nouveau cas covid-19 117.253247 498.746753 33.407363 -16.198120
18666456 453507 97946 433 96 ouest courrier ouest 100.693129 428.306871 33.116116 -16.056904
7020051 453507 97946 433 96 courrier ouest courrier 100.693129 428.306871 33.116116 -16.056904
26767799 1345255 669310 706 478 vaccin contre covid-19 225.369878 958.630122 32.015704 -15.523350
24714326 164403 0 290 0 source agence ecofin 55.200392 234.799608 31.602853 -15.323172
11428414 231697 0 245 0 from roubaix web' 46.634814 198.365186 29.047605 -14.084218
22918698 231697 0 245 0 roubaix web' post 46.634814 198.365186 29.047605 -14.084218
19780742 231697 0 245 0 photo from roubaix 46.634814 198.365186 29.047605 -14.084218
18976 404873 92444 322 64 000 nouveau cas 73.473626 312.526374 28.993907 -14.058182
27277602 337926 10664 257 10 ville chinois wuhan 50.822430 216.177570 28.921026 -14.022844
19046291 322628 29487 270 23 partager facebook poster 55.771431 237.228569 28.686099 -13.908935
10184159 322628 29487 270 23 facebook poster twitter 55.771431 237.228569 28.686099 -13.908935
22627370 284400 0 233 0 retrouver actualité international 44.350660 188.649340 28.327304 -13.734968
16716584 1349688 957731 615 494 ministre santé olivier 211.093914 897.906086 27.799856 -13.479226
18136895 822362 920638 652 562 nuit samedi dimanche 231.080263 982.919737 27.689675 -13.425803
19152322 284119 2076 223 2 partir ville chinois 42.827891 182.172109 27.531142 -13.348935
8737496 513135 23336 241 16 décès 24 heure 48.918968 208.081032 27.462865 -13.315830
2529326 661308 143804 314 87 appel témoin disparition 76.328818 324.671182 27.203968 -13.190299
23696413 1390260 958854 612 519 santé olivier véran 215.281530 915.718470 27.038278 -13.109962
5823862 421293 0 207 0 club mediapart enaccèslibre 39.401659 167.598341 26.700074 -12.945978
14942351 1111501 642594 518 375 lieu public clore 169.979139 723.020861 26.693619 -12.942848
16074261 737709 483481 461 292 masque devenir obligatoire 143.330674 609.669326 26.534182 -12.865543
11215177 1374192 8943 214 7 france 24 urgent 42.066506 178.933494 26.508934 -12.853301
In [24]:
fig = px.bar(graph_media3, x="res_media", y="trigramme", labels={"res_media":"Résiduel de Pearson","trigramme":"Trigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="DarkOliveGreen", opacity=0.75, textfont_size=12)
fig
In [25]:
graph_nonmedia3 = khi2_3.sort_values(by=["res_nonmedia"],ascending=False).head(50)
graph_nonmedia3
Out[25]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia
24238182 0 2709873 0 4073 silhouette normal taille 775.279993 3297.720007 -27.843850 13.500557
26519865 8925 9948632 5 3397 twitter for iphone 647.557706 2754.442294 -25.250670 12.243210
1743245 0 1564644 0 2344 alert pet alert 446.171447 1897.828553 -21.122771 10.241729
20622958 1312 3876077 1 2332 premier chaîne réinformation 444.077639 1888.922361 -21.025696 10.194661
26512249 1312 3848902 1 2291 tvlibertés premier chaîne 436.273445 1855.726555 -20.839284 10.104276
19625757 0 1447664 0 2164 pet alert pet 411.909134 1752.090866 -20.295545 9.840635
11525472 0 4078455 0 2157 féminin 100 social 410.576711 1746.423289 -20.262693 9.824706
4199106 187415 8073410 73 2590 bon week end 506.891878 2156.108122 -19.271870 9.344289
25411780 0 1257252 0 1898 taille moyen poil 361.277050 1536.722950 -19.007289 9.216003
19779570 0 3748461 0 1805 photo from association 343.574856 1461.425144 -18.535772 8.987380
25412250 562 1212117 1 1769 taille petit poil 336.912739 1433.087261 -18.300702 8.873402
17909888 0 1132439 0 1747 normal taille moyen 332.534777 1414.465223 -18.235536 8.841805
19780857 0 1316621 0 1622 photo from spa 308.741505 1313.258495 -17.571042 8.519614
19780469 0 1980490 0 1568 photo from météo 298.462811 1269.537189 -17.276076 8.376595
17909890 0 1067039 0 1557 normal taille petit 296.369003 1260.630997 -17.215371 8.347161
8710105 0 3189137 0 1534 découvrir idée repas 291.991041 1242.008959 -17.087745 8.285279
12937239 0 3189137 0 1534 idée repas recette 291.991041 1242.008959 -17.087745 8.285279
22400978 0 3184196 0 1531 repas recette cuisine 291.420002 1239.579998 -17.071028 8.277174
21989262 0 3184196 0 1531 recette cuisine actuel 291.420002 1239.579998 -17.071028 8.277174
18769323 0 1976928 0 1495 paiement 3 partir 284.567540 1210.432460 -16.869130 8.179280
717103 0 1976928 0 1495 3 partir 150 284.567540 1210.432460 -16.869130 8.179280
20152741 0 979413 0 1445 poil court contact 275.050231 1169.949769 -16.584638 8.041340
4227241 0 2101382 0 1425 bonjour tonton jo 271.243307 1153.756693 -16.469466 7.985496
13697110 706525 6454281 494 4845 is re-share of 1016.258257 4322.741743 -16.382617 7.943386
21967341 706525 6454281 494 4845 re-share of post 1016.258257 4322.741743 -16.382617 7.943386
25787718 702232 6432827 492 4821 this is re-share 1011.309256 4301.690744 -16.329921 7.917835
16726939 0 1386926 0 1299 minodusud lookbymds découvrir 247.259688 1051.740312 -15.724493 7.624284
19781023 0 1263221 0 1230 photo from ville 234.125802 995.874198 -15.301170 7.419028
16278557 0 2502015 0 1209 media féminin 100 230.128532 978.871468 -15.169988 7.355423
28139172 0 2444360 0 1180 éclaireur media féminin 224.608493 955.391507 -14.986944 7.266671
11428607 0 870082 0 1173 from spa refuge 223.276070 949.723930 -14.942425 7.245085
15491295 0 4174753 0 1170 mag timeline photo 222.705031 947.294969 -14.923305 7.235814
28139205 0 2424620 0 1165 éclaireur éclaireur media 221.753300 943.246700 -14.891383 7.220337
9749069 0 1244943 0 1157 esprit spiritualité métaphysique 220.230531 936.769469 -14.840166 7.195503
12284620 0 2794779 0 1117 gérer sponsoriser facebook 212.616683 904.383317 -14.581381 7.070027
17260777 0 726453 0 1099 moyen poil court 209.190452 889.809548 -14.463418 7.012830
17044552 0 998875 0 1098 mont serein mont 209.000106 888.999894 -14.456836 7.009639
24093925 0 998223 0 1097 serein mont ventoux 208.809760 888.190240 -14.450251 7.006446
10004497 0 1727663 0 1062 exister bruno picard 202.147644 859.852356 -14.217864 6.893769
1910842 0 1329076 0 1057 always an amazing 201.195913 855.804087 -14.184355 6.877522
18935152 0 1329076 0 1057 paris always an 201.195913 855.804087 -14.184355 6.877522
2079766 0 1328175 0 1056 an amazing idea 201.005566 854.994434 -14.177643 6.874268
18983609 0 1326183 0 1054 parisweloveyou parisjetaim cityscape 200.624874 853.375126 -14.164211 6.867755
1925114 0 1326456 0 1054 amazing idea pari 200.624874 853.375126 -14.164211 6.867755
18983109 0 1325376 0 1053 parisjetaim cityscape villedeparis 200.434528 852.565472 -14.157490 6.864496
20055759 0 957679 0 1053 plateau albion mont 200.434528 852.565472 -14.157490 6.864496
18971809 0 1325714 0 1053 parisalwaysamazing parisweloveyou parisjetaim 200.434528 852.565472 -14.157490 6.864496
18971811 0 1325714 0 1053 parisalwaysanamazingidea parisalwaysamazing pa... 200.434528 852.565472 -14.157490 6.864496
1724825 0 954790 0 1051 albion mont serein 200.053836 850.946164 -14.144039 6.857974
18931394 0 1321429 0 1050 parigi париж 巴黎 199.863489 850.136511 -14.137308 6.854711
In [26]:
fig = px.bar(graph_nonmedia3, x="res_nonmedia", y="trigramme", labels={"res_nonmedia":"Résiduel de Pearson","trigramme":"Trigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="Olive", opacity=0.75, textfont_size=12)
fig
In [27]:
khi2_3["exp_inter_media"] = ((khi2_3.interactions_media + khi2_3.interactions_nonmedia) * khi2_3.interactions_media.sum()) / (khi2_3.interactions_media.sum() + khi2_3.interactions_nonmedia.sum())
khi2_3["exp_inter_nonmedia"] = ((khi2_3.interactions_media + khi2_3.interactions_nonmedia) * khi2_3.interactions_nonmedia.sum()) / (khi2_3.interactions_media.sum() + khi2_3.interactions_nonmedia.sum())
khi2_3
Out[27]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia
0 0 5923 0 1 # photo from 0.190346 0.809654 -0.436287 0.211541 1019.030077 4903.969923
1 0 516 0 1 # timeline photo 0.190346 0.809654 -0.436287 0.211541 88.775877 427.224123
2 0 1300 0 1 #100kfollower iyi aksamlar 0.190346 0.809654 -0.436287 0.211541 223.660155 1076.339845
3 0 5896 0 1 #106cabrel ca drôle 0.190346 0.809654 -0.436287 0.211541 1014.384828 4881.615172
4 0 713 0 1 #11mai 11mai2020 deconfinement 0.190346 0.809654 -0.436287 0.211541 122.668993 590.331007
... ... ... ... ... ... ... ... ... ... ... ...
28777042 0 1167 0 1 𖡡𖡡𖡡 contact fb 0.190346 0.809654 -0.436287 0.211541 200.778001 966.221999
28777043 0 534 0 1 🛕histoir savoir cliquer 0.190346 0.809654 -0.436287 0.211541 91.872710 442.127290
28777044 0 1294 0 1 🛖 beaucoup cadeau 0.190346 0.809654 -0.436287 0.211541 222.627878 1071.372122
28777045 0 896 0 1 🛖 elobi elobistory 0.190346 0.809654 -0.436287 0.211541 154.153461 741.846539
28777046 0 904 0 1 􏰒􏰄􏰃􏰁􏰉dever affluer dire 0.190346 0.809654 -0.436287 0.211541 155.529831 748.470169

28777047 rows × 11 columns

In [28]:
khi2_3["res_inter_media"] = (khi2_3.interactions_media - khi2_3.exp_inter_media) / np.sqrt(khi2_3.exp_inter_media)
khi2_3["res_inter_nonmedia"] = (khi2_3.interactions_nonmedia - khi2_3.exp_inter_nonmedia) / np.sqrt(khi2_3.exp_inter_nonmedia)
khi2_3
Out[28]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
0 0 5923 0 1 # photo from 0.190346 0.809654 -0.436287 0.211541 1019.030077 4903.969923 -31.922250 14.551679
1 0 516 0 1 # timeline photo 0.190346 0.809654 -0.436287 0.211541 88.775877 427.224123 -9.422095 4.295039
2 0 1300 0 1 #100kfollower iyi aksamlar 0.190346 0.809654 -0.436287 0.211541 223.660155 1076.339845 -14.955272 6.817324
3 0 5896 0 1 #106cabrel ca drôle 0.190346 0.809654 -0.436287 0.211541 1014.384828 4881.615172 -31.849409 14.518474
4 0 713 0 1 #11mai 11mai2020 deconfinement 0.190346 0.809654 -0.436287 0.211541 122.668993 590.331007 -11.075603 5.048786
... ... ... ... ... ... ... ... ... ... ... ... ... ...
28777042 0 1167 0 1 𖡡𖡡𖡡 contact fb 0.190346 0.809654 -0.436287 0.211541 200.778001 966.221999 -14.169615 6.459184
28777043 0 534 0 1 🛕histoir savoir cliquer 0.190346 0.809654 -0.436287 0.211541 91.872710 442.127290 -9.585025 4.369310
28777044 0 1294 0 1 🛖 beaucoup cadeau 0.190346 0.809654 -0.436287 0.211541 222.627878 1071.372122 -14.920720 6.801573
28777045 0 896 0 1 🛖 elobi elobistory 0.190346 0.809654 -0.436287 0.211541 154.153461 741.846539 -12.415855 5.659737
28777046 0 904 0 1 􏰒􏰄􏰃􏰁􏰉dever affluer dire 0.190346 0.809654 -0.436287 0.211541 155.529831 748.470169 -12.471160 5.684947

28777047 rows × 13 columns

In [29]:
graph_interactions_media3 = khi2_3.sort_values(by=["res_inter_media"],ascending=False).head(50)
graph_interactions_media3
Out[29]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
9678661 15404479 69437 3121 8 equipe timeline photo 595.593198 2533.406802 103.480015 -50.174018 2.662230e+06 1.281169e+07 7809.502160 -3559.942240
11835921 4908461 64729 936 58 gmt canal sport 189.204103 804.795897 54.292101 -26.324434 8.556188e+05 4.117571e+06 4381.466879 -1997.280838
23233417 2859114 45700 56 37 rémy buisine direct 17.702195 75.297805 9.102492 -4.413496 4.997624e+05 2.405052e+06 3337.419985 -1521.354644
4471168 2649766 210436 241 104 brut nature fr 65.669432 279.330568 21.635952 -10.490554 4.920871e+05 2.368115e+06 3075.854783 -1402.120794
26708824 2158501 0 350 0 urgent timeline photo 66.621163 283.378837 34.718528 -16.833860 3.713621e+05 1.787139e+06 2932.645426 -1336.839163
641985 2158501 0 350 0 24 urgent timeline 66.621163 283.378837 34.718528 -16.833860 3.713621e+05 1.787139e+06 2932.645426 -1336.839163
17143097 2678152 1183057 942 647 mort george floyd 302.460081 1286.539919 36.773386 -17.830192 6.643066e+05 3.196902e+06 2470.824127 -1126.319066
11215177 1374192 8943 214 7 france 24 urgent 42.066506 178.933494 26.508934 -12.853301 2.379632e+05 1.145172e+06 2329.221896 -1061.770039
19970192 3300254 2455579 1976 1492 placer garde vue 660.120553 2807.879447 51.215861 -24.832868 9.902696e+05 4.765563e+06 2321.305585 -1058.161409
18228837 1183394 14104 516 6 observateur france 24 99.360706 422.639294 41.797749 -20.266338 2.060251e+05 9.914729e+05 2153.270033 -981.562818
7697218 1317470 202890 812 152 dernier 24 heure 183.493718 780.506282 46.397975 -22.496835 2.615723e+05 1.258788e+06 2064.552841 -941.121305
5112087 1264411 272357 30 15 chaine canal sport 8.565578 36.434422 7.323749 -3.551042 2.643952e+05 1.272373e+06 1944.822943 -886.542728
4519800 1123609 145476 85 54 bus agresser bayonne 26.458119 112.541881 11.381177 -5.518354 2.183413e+05 1.050744e+06 1937.354131 -883.138088
5325111 1330337 356226 141 110 chauffeur bus agresser 47.776891 203.223109 13.486978 -6.539387 2.901669e+05 1.396396e+06 1930.991915 -880.237887
1747017 962921 26568 184 18 alerte info coronavirus 38.449928 163.550072 23.472787 -11.381174 1.702379e+05 8.192511e+05 1921.195150 -875.772057
636263 975527 50078 437 30 24 heure france 88.891666 378.108334 36.921888 -17.902196 1.764515e+05 8.491535e+05 1902.282908 -867.150958
13008926 874056 2960 412 3 image amateur témoignage 78.993665 336.006335 37.467642 -18.166814 1.508873e+05 7.261287e+05 1861.715039 -848.658195
16297966 871360 2960 411 3 meilleur image amateur 78.803319 335.196681 37.421659 -18.144518 1.504235e+05 7.238965e+05 1858.827863 -847.342083
26566996 871360 2960 411 3 témoignage observateur contenu 78.803319 335.196681 37.421659 -18.144518 1.504235e+05 7.238965e+05 1858.827863 -847.342083
1923384 871360 2960 411 3 amateur témoignage observateur 78.803319 335.196681 37.421659 -18.144518 1.504235e+05 7.238965e+05 1858.827863 -847.342083
1298015 871360 2960 411 3 actualité international meilleur 78.803319 335.196681 37.421659 -18.144518 1.504235e+05 7.238965e+05 1858.827863 -847.342083
13522422 871360 2960 411 3 international meilleur image 78.803319 335.196681 37.421659 -18.144518 1.504235e+05 7.238965e+05 1858.827863 -847.342083
6615330 871360 2960 411 3 contenu vérifier expliquer 78.803319 335.196681 37.421659 -18.144518 1.504235e+05 7.238965e+05 1858.827863 -847.342083
18228695 871360 2960 411 3 observateur contenu vérifier 78.803319 335.196681 37.421659 -18.144518 1.504235e+05 7.238965e+05 1858.827863 -847.342083
25083925 870270 2960 411 3 suivre actualité international 78.803319 335.196681 37.421659 -18.144518 1.502360e+05 7.229940e+05 1857.659297 -846.809395
16139327 910731 38278 106 14 match timeline photo 22.841542 97.158458 17.399779 -8.436574 1.632735e+05 7.857355e+05 1849.816827 -843.234424
21686697 864974 17244 17 8 question très simple 4.758655 20.241345 5.611605 -2.720880 1.517823e+05 7.304357e+05 1830.609351 -834.478744
7317477 832217 2614 41 2 crêpe voyage terre 8.184886 34.815114 11.470111 -5.561475 1.436296e+05 6.912014e+05 1816.925736 -828.241102
5753228 810555 2184 121 1 classement timeline photo 23.222234 98.777766 20.290283 -9.838084 1.398287e+05 6.729103e+05 1793.688982 -817.648685
23607497 808926 3207 83 6 samedi 16h konbini 16.940810 72.059190 16.049672 -7.781952 1.397245e+05 6.724085e+05 1790.279012 -816.094259
389608 810710 5661 95 5 19h55 gmt canal 19.034618 80.965382 17.411799 -8.442402 1.404536e+05 6.759174e+05 1788.440754 -815.256294
11206542 784309 0 136 0 franc 24 urgent 25.887081 110.112919 21.641967 -10.493470 1.349374e+05 6.493716e+05 1767.775170 -805.835938
25726387 1730998 1170433 832 562 tester positif coronavirus 265.342575 1128.657425 34.787011 -16.867065 4.991804e+05 2.402251e+06 1743.482730 -794.762289
27742744 765830 2614 41 2 voyage terre bretagne 8.184886 34.815114 11.470111 -5.561475 1.322079e+05 6.362361e+05 1742.615415 -794.366926
415878 781998 22003 2 1 20 octobre chaine 0.571039 2.428961 1.890984 -0.916875 1.383254e+05 6.656756e+05 1730.669377 -788.921354
19129584 803774 44563 3 3 partir 20 octobre 1.142077 4.857923 1.738521 -0.842951 1.459532e+05 7.023838e+05 1721.871546 -784.910885
14667309 1330015 641145 829 308 lancer appel témoin 216.423607 920.576393 41.639733 -20.189722 3.391307e+05 1.632029e+06 1701.529016 -775.637793
26767799 1345255 669310 706 478 vaccin contre covid-19 225.369878 958.630122 32.015704 -15.523350 3.465984e+05 1.667967e+06 1696.300926 -773.254581
3061817 891984 146068 10 25 attention image sensible 6.662116 28.337884 1.293198 -0.627029 1.785930e+05 8.594590e+05 1688.089427 -769.511389
15649124 750346 31151 83 9 make home great 17.511849 74.488151 15.649366 -7.587857 1.344536e+05 6.470434e+05 1679.648938 -765.663813
12612318 750346 31151 83 9 home great again 17.511849 74.488151 15.649366 -7.587857 1.344536e+05 6.470434e+05 1679.648938 -765.663813
26378213 703310 10778 4 5 truc savoir clitoris 1.713116 7.286884 1.747233 -0.847175 1.228562e+05 5.912318e+05 1656.033706 -754.898868
26441522 682812 0 4 0 très simple coronavirus 0.761385 3.238615 3.711567 -1.799615 1.174753e+05 5.653367e+05 1649.430431 -751.888782
128721 679942 0 2 0 11 question très 0.380692 1.619308 2.624474 -1.272520 1.169815e+05 5.629605e+05 1645.960332 -750.306946
26314269 672166 3233 2 1 trop personne sous-estimer 0.571039 2.428961 1.890984 -0.916875 1.161999e+05 5.591991e+05 1630.968167 -743.472804
23233447 670463 4848 27 5 rémy buisine question 6.091078 25.908922 8.471973 -4.107778 1.161847e+05 5.591263e+05 1626.122644 -741.263984
23261093 670463 4848 27 5 répondre rémy buisine 6.091078 25.908922 8.471973 -4.107778 1.161847e+05 5.591263e+05 1626.122644 -741.263984
17973989 844036 154039 479 137 nouveau cas covid-19 117.253247 498.746753 33.407363 -16.198120 1.717151e+05 8.263599e+05 1622.454023 -739.591653
12661300 660515 1390 6 1 homme reconnaissance mammifère 1.332423 5.667577 4.043620 -1.960617 1.138783e+05 5.480267e+05 1619.863163 -738.410615
27955970 660515 1390 6 1 xavier installer caméra 1.332423 5.667577 4.043620 -1.960617 1.138783e+05 5.480267e+05 1619.863163 -738.410615
In [30]:
fig = px.bar(graph_interactions_media3, x="res_inter_media", y="trigramme", labels={"res_inter_media":"Résiduel de Pearson (<b>en fonction des interactions</b>)","trigramme":"Trigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="green", opacity=0.75, textfont_size=12)
fig
In [31]:
graph_interactions_nonmedia3 = khi2_3.sort_values(by=["res_inter_nonmedia"],ascending=False).head(50)
graph_interactions_nonmedia3
Out[31]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
3510668 0 14513153 0 505 bar mojito whisky 96.124821 408.875179 -9.804327 4.753792 2.496934e+06 1.201622e+07 -1580.168943 720.316104
22915319 0 14102900 0 484 rosé ptdr comique 92.127551 391.872449 -9.598310 4.653901 2.426351e+06 1.167655e+07 -1557.674994 710.062293
26519865 8925 9948632 5 3397 twitter for iphone 647.557706 2754.442294 -25.250670 12.243210 1.713161e+06 8.244396e+06 -1302.058781 593.540274
28198200 0 8839078 0 666 écouter rire chanson 126.770556 539.229444 -11.259243 5.459233 1.520730e+06 7.318348e+06 -1233.178993 562.141593
4488134 0 6987431 0 342 brûler gomme instagram 65.098394 276.901606 -8.068358 3.912078 1.202161e+06 5.785270e+06 -1096.431240 499.805468
17640761 0 6987431 0 342 naître brûler gomme 65.098394 276.901606 -8.068358 3.912078 1.202161e+06 5.785270e+06 -1096.431240 499.805468
11852804 0 6987431 0 342 gomme instagram snapchat 65.098394 276.901606 -8.068358 3.912078 1.202161e+06 5.785270e+06 -1096.431240 499.805468
5382295 0 6983800 0 340 cher naître brûler 64.717701 275.282299 -8.044731 3.900623 1.201537e+06 5.782263e+06 -1096.146324 499.675590
17200569 0 6983800 0 340 moto cher naître 64.717701 275.282299 -8.044731 3.900623 1.201537e+06 5.782263e+06 -1096.146324 499.675590
2939965 0 6983800 0 340 assurance moto cher 64.717701 275.282299 -8.044731 3.900623 1.201537e+06 5.782263e+06 -1096.146324 499.675590
19037026 416 6820219 1 374 partage autoriser mathieu 71.379818 303.620182 -8.330296 4.039083 1.173465e+06 5.647170e+06 -1082.881795 493.628987
3212615 416 6815114 1 374 autoriser mathieu rivrin 71.379818 303.620182 -8.330296 4.039083 1.172587e+06 5.642943e+06 -1082.476182 493.444090
19063633 0 6741951 0 334 partenaire assurance moto 63.575624 270.424376 -7.973432 3.866052 1.159928e+06 5.582023e+06 -1076.999324 490.947478
5801143 0 6648038 0 607 cliquer écouter rire 115.540131 491.459869 -10.748960 5.211813 1.143770e+06 5.504268e+06 -1069.471908 487.516124
24500726 0 6338439 0 423 soirée partage autoriser 80.516434 342.483566 -8.973095 4.350755 1.090505e+06 5.247934e+06 -1044.272382 476.028982
4199106 187415 8073410 73 2590 bon week end 506.891878 2156.108122 -19.271870 9.344289 1.421244e+06 6.839581e+06 -1034.953131 471.780824
11000046 86513 6565868 47 676 fondation 30 million 137.620288 585.379712 -7.724746 3.745472 1.144517e+06 5.507864e+06 -988.954396 450.812415
6801680 0 5601949 0 923 corona corona corona 175.689524 747.310476 -13.254793 6.426808 9.637944e+05 4.638155e+06 -981.730334 447.519344
4192958 0 5553935 0 373 bon soirée partage 70.999125 302.000875 -8.426098 4.085535 9.555338e+05 4.598401e+06 -977.514102 445.597385
20653670 137407 6365478 8 1046 prendre bien soin 200.624874 853.375126 -13.599407 6.593900 1.118797e+06 5.384088e+06 -927.824880 422.946676
26956352 0 4770056 0 996 venir raconter vdm 189.584796 806.415204 -13.768979 6.676120 8.206704e+05 3.949386e+06 -905.908582 412.956186
12901319 0 4727218 0 987 ici télécharger appli 187.871680 799.128320 -13.706629 6.645889 8.133002e+05 3.913918e+06 -901.831603 411.097705
21495727 0 4617491 0 292 punchline hésiter insta 55.581085 236.418915 -7.455272 3.614814 7.944221e+05 3.823069e+06 -891.303606 406.298544
26542259 0 4536558 0 613 téléchargeable app store 116.682209 496.317791 -10.801954 5.237508 7.804979e+05 3.756060e+06 -883.457921 402.722108
11859747 0 4521121 0 612 google play téléchargeable 116.491862 495.508138 -10.793140 5.233234 7.778420e+05 3.743279e+06 -881.953524 402.036332
20064845 0 4521121 0 612 play téléchargeable app 116.491862 495.508138 -10.793140 5.233234 7.778420e+05 3.743279e+06 -881.953524 402.036332
8140281 0 4493332 0 608 disponible google play 115.730478 492.269522 -10.757810 5.216104 7.730610e+05 3.720271e+06 -879.238890 400.798873
26868862 0 4451289 0 569 vdm vdm disponible 108.306977 460.693023 -10.407064 5.046039 7.658277e+05 3.685461e+06 -875.115812 398.919378
26868082 0 4401860 0 561 vdm disponible google 106.784207 454.215793 -10.333644 5.010441 7.573236e+05 3.644536e+06 -870.243419 396.698310
26543016 0 4323019 0 893 télécharger appli grato 169.979139 723.020861 -13.037605 6.321501 7.437593e+05 3.579260e+06 -862.414812 393.129659
2561368 0 4323019 0 893 appli grato android 169.979139 723.020861 -13.037605 6.321501 7.437593e+05 3.579260e+06 -862.414812 393.129659
12021342 0 4323019 0 893 grato android io 169.979139 723.020861 -13.037605 6.321501 7.437593e+05 3.579260e+06 -862.414812 393.129659
21770680 0 4251519 0 872 raconter vdm ici 165.981869 706.018131 -12.883395 6.246730 7.314580e+05 3.520061e+06 -855.253179 389.865046
2223494 0 4206300 0 616 android io vdm 117.253247 498.746753 -10.828354 5.250309 7.236782e+05 3.482622e+06 -850.692800 387.786208
15491295 0 4174753 0 1170 mag timeline photo 222.705031 947.294969 -14.923305 7.235814 7.182507e+05 3.456502e+06 -847.496723 386.329285
24496055 0 4165401 0 391 soirée doux nuit 74.425357 316.574643 -8.627013 4.182952 7.166417e+05 3.448759e+06 -846.546938 385.896328
26868267 0 4153227 0 854 vdm ici télécharger 162.555638 691.444362 -12.749731 6.181921 7.145472e+05 3.438680e+06 -845.308954 385.331996
16747530 574 4141304 1 740 minute temps cuisson 141.046520 599.953480 -11.792099 5.717597 7.125947e+05 3.429283e+06 -843.473258 384.495198
8400753 0 4133495 0 310 doux nuit ami 59.007316 250.992684 -7.681622 3.724563 7.111524e+05 3.422343e+06 -843.298530 384.415549
11525472 0 4078455 0 2157 féminin 100 social 410.576711 1746.423289 -20.262693 9.824706 7.016830e+05 3.376772e+06 -837.665198 381.847608
730511 486513 8304564 87 1080 30 million ami 222.133992 944.866008 -9.066864 4.396221 1.512472e+06 7.278605e+06 -834.231038 380.282155
3730261 0 3974868 0 323 bel soirée doux 61.481816 261.518184 -7.841034 3.801857 6.838612e+05 3.291007e+06 -826.959023 376.967225
14851869 0 3970497 0 48 lfauit bataille jury 9.136617 38.863383 -3.022684 1.465599 6.831092e+05 3.287388e+06 -826.504212 376.759900
22786536 0 3957088 0 477 rire cliquer écouter 90.795128 386.204872 -9.528648 4.620124 6.808022e+05 3.276286e+06 -825.107413 376.123173
2488608 0 3935978 0 142 aperotime aperitif biere 27.029158 114.970842 -5.198957 2.520802 6.771703e+05 3.258808e+06 -822.903607 375.118574
12776994 0 3935978 0 142 humour alcool citation 27.029158 114.970842 -5.198957 2.520802 6.771703e+05 3.258808e+06 -822.903607 375.118574
12781071 0 3935978 0 142 humourdemerde bar mojito 27.029158 114.970842 -5.198957 2.520802 6.771703e+05 3.258808e+06 -822.903607 375.118574
5687409 0 3935978 0 142 citation lol apero 27.029158 114.970842 -5.198957 2.520802 6.771703e+05 3.258808e+06 -822.903607 375.118574
16269738 0 3935978 0 142 mdr humour alcool 27.029158 114.970842 -5.198957 2.520802 6.771703e+05 3.258808e+06 -822.903607 375.118574
8465111 0 3935978 0 142 drole aperotime aperitif 27.029158 114.970842 -5.198957 2.520802 6.771703e+05 3.258808e+06 -822.903607 375.118574
In [32]:
fig = px.bar(graph_interactions_nonmedia3, x="res_inter_nonmedia", y="trigramme", labels={"res_inter_nonmedia":"Résiduel de Pearson (<b>en fonction des interactions</b>)","trigramme":"Trigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="lightgreen", opacity=0.75, textfont_size=12)
fig

In English

In [6]:
pays = "France"
In [76]:
termeMaj = "Lemma"
termeMin = "lemma"
yy = "mot"
media = "media"
source = graph_interactions_media1[:10]
xx= "res_inter_media"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [78]:
termeMaj = "Lemma"
termeMin = "lemma"
yy = "mot"
media = "non-media"
source = graph_interactions_nonmedia1[:10]
xx= "res_inter_nonmedia"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [32]:
termeMaj = "Bigram"
termeMin = "bigram"
yy = "bigramme"
media = "media"
source = graph_interactions_media2[:10]
xx= "res_inter_media"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [33]:
termeMaj = "Bigram"
termeMin = "bigram"
yy = "bigramme"
media = "non-media"
source = graph_interactions_nonmedia2[:10]
xx= "res_inter_nonmedia"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [74]:
termeMaj = "Trigram"
termeMin = "trigram"
yy = "trigramme"
media = "media"
source = graph_interactions_media3[:10]
xx= "res_inter_media"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [75]:
termeMaj = "Trigram"
termeMin = "trigram"
yy = "trigramme"
media = "non-media"
source = graph_interactions_nonmedia3[:10]
xx = "res_inter_nonmedia"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [ ]: