In [1]:
import pandas as pan
import numpy as np
import plotly.express as px
pays = "au Canada francophone"

Canada francophone

mots seuls

In [2]:
media1 = pan.read_csv("canada-motsSeuls-media-nettoye.csv", low_memory=False, names=["mot","interactions"])
nonmedia1 = pan.read_csv("canada-motsSeuls-nonmedia-nettoye.csv", low_memory=False, names=["mot","interactions"])
In [3]:
media1
Out[3]:
mot interactions
0 recueillir 249663
1 endroit 249663
2 frère 249663
3 tuer 249663
4 terrence 249663
... ... ...
3904926 tomate 363
3904927 maskinongé 145
3904928 demeure 145
3904929 zone 145
3904930 orange 145

3904931 rows × 2 columns

In [4]:
nonmedia1
Out[4]:
mot interactions
0 guinéen 197962
1 incroyable 197962
2 talent 197962
3 guinéen 197962
4 incroyable 197962
... ... ...
10455865 przede 149
10455866 wszystkim 149
10455867 by 149
10455868 cię 149
10455869 kochać 149

10455870 rows × 2 columns

In [5]:
media1_table = pan.pivot_table(media1,index=["mot"],values=["mot","interactions"],aggfunc=[len,np.sum])
nonmedia1_table = pan.pivot_table(nonmedia1,index=["mot"],values=["mot","interactions"],aggfunc=[len,np.sum])
In [6]:
media1_table
Out[6]:
len sum
interactions interactions
mot
#achatlocal 1 325
#cavabienaller 1 334
#jourdelaterrechezsoi 1 247
#revuedepress 1 184
' 34 5546
... ... ...
西國' 1 2140
黑瑞金包装 1 390
대니 2 476
프블 1 666
화대 1 279

60046 rows × 2 columns

In [7]:
nonmedia1_table
Out[7]:
len sum
interactions interactions
mot
#1 1 292
#6 1 838
#achatlocal 1 1217
#acq 1 391
#amen 1 273
... ... ...
et 1 2311
les 1 151
nouvelle 1 181
trips 1 151
wψ 1 243

205652 rows × 2 columns

In [8]:
media1_table["type"] = "media"
nonmedia1_table["type"] = "non-media"
In [9]:
media1_table
Out[9]:
len sum type
interactions interactions
mot
#achatlocal 1 325 media
#cavabienaller 1 334 media
#jourdelaterrechezsoi 1 247 media
#revuedepress 1 184 media
' 34 5546 media
... ... ... ...
西國' 1 2140 media
黑瑞金包装 1 390 media
대니 2 476 media
프블 1 666 media
화대 1 279 media

60046 rows × 3 columns

In [10]:
nonmedia1_table
Out[10]:
len sum type
interactions interactions
mot
#1 1 292 non-media
#6 1 838 non-media
#achatlocal 1 1217 non-media
#acq 1 391 non-media
#amen 1 273 non-media
... ... ... ...
et 1 2311 non-media
les 1 151 non-media
nouvelle 1 181 non-media
trips 1 151 non-media
wψ 1 243 non-media

205652 rows × 3 columns

In [11]:
tableau1 = pan.concat([media1_table,nonmedia1_table], names = ["mot","nb","interactions","type"], levels=0)
In [12]:
tableau1
Out[12]:
len sum type
interactions interactions
mot
#achatlocal 1 325 media
#cavabienaller 1 334 media
#jourdelaterrechezsoi 1 247 media
#revuedepress 1 184 media
' 34 5546 media
... ... ... ...
et 1 2311 non-media
les 1 151 non-media
nouvelle 1 181 non-media
trips 1 151 non-media
wψ 1 243 non-media

265698 rows × 3 columns

In [13]:
tableau1["mot"] = tableau1.index
tableau1.columns = ["nb","interactions","media","mot"]
tableau1.reset_index(drop=True, inplace=True)
tableau1
Out[13]:
nb interactions media mot
0 1 325 media #achatlocal
1 1 334 media #cavabienaller
2 1 247 media #jourdelaterrechezsoi
3 1 184 media #revuedepress
4 34 5546 media '
... ... ... ... ...
265693 1 2311 non-media et
265694 1 151 non-media les
265695 1 181 non-media nouvelle
265696 1 151 non-media trips
265697 1 243 non-media wψ

265698 rows × 4 columns

In [14]:
tableau1.mot.value_counts()
Out[14]:
récipient     2
shut          2
contant       2
fpss          2
amputé        2
             ..
5910          1
lavraievie    1
loyalty       1
jvdb          1
antidater     1
Name: mot, Length: 221209, dtype: int64
In [15]:
khi2_1 = pan.pivot_table(tableau1,index=["mot"],columns=["media"],values=["mot","interactions"],aggfunc=[len,np.sum],fill_value=0)
In [16]:
khi2_1
Out[16]:
len sum
interactions nb interactions nb
media media non-media media non-media media non-media media non-media
mot
#1 0 1 0 1 0 292 0 1
#6 0 1 0 1 0 838 0 1
#achatlocal 1 1 1 1 325 1217 1 1
#acq 0 1 0 1 0 391 0 1
#amen 0 1 0 1 0 273 0 1
... ... ... ... ... ... ... ... ...
et 0 1 0 1 0 2311 0 1
les 0 1 0 1 0 151 0 1
nouvelle 0 1 0 1 0 181 0 1
trips 0 1 0 1 0 151 0 1
wψ 0 1 0 1 0 243 0 1

221209 rows × 8 columns

In [17]:
khi2_1["mot"] = khi2_1.index
khi2_1.columns = khi2_1.columns.get_level_values(0)
khi2_1.reset_index(drop=True, inplace=True)
khi2_1.columns = ["a","b","c","d","interactions_media","interactions_nonmedia","obs_media","obs_nonmedia","mot"]
khi2_1 = khi2_1.drop(columns=["a","b","c","d"])
khi2_1
Out[17]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot
0 0 292 0 1 #1
1 0 838 0 1 #6
2 325 1217 1 1 #achatlocal
3 0 391 0 1 #acq
4 0 273 0 1 #amen
... ... ... ... ... ...
221204 0 2311 0 1 et
221205 0 151 0 1 les
221206 0 181 0 1 nouvelle
221207 0 151 0 1 trips
221208 0 243 0 1 wψ

221209 rows × 5 columns

In [18]:
m = khi2_1.mot == "franchement"
khi2_1[m]
Out[18]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot
84973 14792 468711 38 436 franchement
In [19]:
khi2_1.obs_media.sum(), khi2_1.obs_nonmedia.sum()
Out[19]:
(3904931, 10455525)
In [20]:
print("Nb de lignes fichier media1 = ", media1.shape[0])
print("Somme observée média khi2_1 = ", khi2_1.obs_media.sum())

print("Nb de lignes  fichier nonmedia1 = ", nonmedia1.shape[0])
print("Somme observée nonmédia khi2_1 = ", khi2_1.obs_nonmedia.sum())
Nb de lignes fichier media1 =  3904931
Somme observée média khi2_1 =  3904931
Nb de lignes  fichier nonmedia1 =  10455870
Somme observée nonmédia khi2_1 =  10455525
In [21]:
khi2_1["exp_media"] = ((khi2_1.obs_media + khi2_1.obs_nonmedia) * khi2_1.obs_media.sum()) / (khi2_1.obs_media.sum() + khi2_1.obs_nonmedia.sum())
khi2_1["exp_nonmedia"] = ((khi2_1.obs_media + khi2_1.obs_nonmedia) * khi2_1.obs_nonmedia.sum()) / (khi2_1.obs_media.sum() + khi2_1.obs_nonmedia.sum())
khi2_1
Out[21]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia
0 0 292 0 1 #1 0.271922 0.728078
1 0 838 0 1 #6 0.271922 0.728078
2 325 1217 1 1 #achatlocal 0.543845 1.456155
3 0 391 0 1 #acq 0.271922 0.728078
4 0 273 0 1 #amen 0.271922 0.728078
... ... ... ... ... ... ... ...
221204 0 2311 0 1 et 0.271922 0.728078
221205 0 151 0 1 les 0.271922 0.728078
221206 0 181 0 1 nouvelle 0.271922 0.728078
221207 0 151 0 1 trips 0.271922 0.728078
221208 0 243 0 1 wψ 0.271922 0.728078

221209 rows × 7 columns

In [22]:
khi2_1["res_media"] = (khi2_1.obs_media - khi2_1.exp_media) / np.sqrt(khi2_1.exp_media)
khi2_1["res_nonmedia"] = (khi2_1.obs_nonmedia - khi2_1.exp_nonmedia) / np.sqrt(khi2_1.exp_nonmedia)
khi2_1
Out[22]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia
0 0 292 0 1 #1 0.271922 0.728078 -0.521462 0.318681
1 0 838 0 1 #6 0.271922 0.728078 -0.521462 0.318681
2 325 1217 1 1 #achatlocal 0.543845 1.456155 0.618550 -0.378015
3 0 391 0 1 #acq 0.271922 0.728078 -0.521462 0.318681
4 0 273 0 1 #amen 0.271922 0.728078 -0.521462 0.318681
... ... ... ... ... ... ... ... ... ...
221204 0 2311 0 1 et 0.271922 0.728078 -0.521462 0.318681
221205 0 151 0 1 les 0.271922 0.728078 -0.521462 0.318681
221206 0 181 0 1 nouvelle 0.271922 0.728078 -0.521462 0.318681
221207 0 151 0 1 trips 0.271922 0.728078 -0.521462 0.318681
221208 0 243 0 1 wψ 0.271922 0.728078 -0.521462 0.318681

221209 rows × 9 columns

In [23]:
moyen = (khi2_1.res_media > -1) & (khi2_1.res_nonmedia > -1) & (khi2_1.res_media < 1) & (khi2_1.res_nonmedia < 1) & (khi2_1.obs_media > 1000) & (khi2_1.obs_nonmedia > 1000)
# moyen = (graph_media1.res_media > -2) & (graph_media1.res_media < 2)
# moyen = (khi2_1.res_media < -2)
khi2_1[moyen]
Out[23]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia
1584 3052809 9511044 4569 12149 10 4546.000242 12171.999758 0.341121 -0.208469
4443 894785 3061921 1404 3877 19 1436.022687 3844.977313 -0.845040 0.516429
5810 1196741 2804926 1722 4464 21 1682.112543 4503.887457 0.972543 -0.594350
17617 1597274 5116455 2389 6537 agir 2427.180175 6498.819825 -0.774974 0.473610
39891 909217 3509056 1548 4210 but 1565.729716 4192.270284 -0.448067 0.273828
50823 1124772 2537057 1384 3619 compagnie 1360.428234 3642.571766 0.639079 -0.390560
76654 707732 2904490 1160 3054 espoir 1145.881387 3068.118613 0.417083 -0.254892
86666 590746 1969953 1112 2950 février 1104.549168 2957.450832 0.224188 -0.137008
118547 532192 1874246 1092 2958 libéral 1101.286098 2948.713902 -0.279823 0.171008
119397 804645 2302373 1192 3194 liste 1192.652055 3193.347945 -0.018881 0.011539
120214 1716033 5365513 2300 6060 long 2273.272044 6086.727956 0.560583 -0.342589
135513 758016 3965940 1288 3466 média 1292.719533 3461.280467 -0.131264 0.080220
139662 1479065 5525449 1648 4458 noir 1660.358744 4445.641256 -0.303301 0.185356
142732 908609 3617667 1735 4670 octobre 1741.663569 4663.336431 -0.159671 0.097579
148771 1517553 5077659 2440 6624 partie 2464.705479 6599.294521 -0.497635 0.304120
149623 515933 3832208 1063 2751 paul 1037.112389 2776.887611 0.803857 -0.491262
150659 2120576 7975719 2982 7951 perdre 2972.928619 7960.071381 0.166372 -0.101675
153316 2325644 7985739 3931 10354 place 3884.412816 10400.587184 0.747487 -0.456812
160453 1216537 3821756 1886 5199 prêt 1926.570865 5158.429135 -0.924319 0.564879
164682 741200 2500648 1162 3192 rapidement 1183.950536 3170.049464 -0.637938 0.389863
166102 871436 3293066 1419 3673 reconnaître 1384.629336 3707.370664 0.923679 -0.564488
176073 814683 2940026 1240 3204 salle 1208.423560 3235.576440 0.908351 -0.555121
183350 956245 2624276 1316 3466 soirée 1300.333363 3481.666637 0.434459 -0.265511
185231 1212679 2915116 1415 3745 spectacle 1403.120065 3756.879935 0.317151 -0.193821
197278 808746 3082105 1249 3461 train 1280.754943 3429.245057 -0.887316 0.542266
197792 2654389 10125451 4414 12037 travail 4473.396937 11977.603063 -0.888066 0.542724
204818 1257209 4020813 1579 4222 victoire 1577.422383 4223.577617 0.039722 -0.024275
206398 1277195 3107965 1681 4402 voiture 1654.104527 4428.895473 0.661299 -0.404140
215699 2404023 7699291 3932 10417 équipe 3901.815856 10447.184144 0.483221 -0.295311
In [24]:
graph_media1 = khi2_1.sort_values(by=["res_media"],ascending=False).head(50)
graph_media1
Out[24]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia
55017 27296939 16912105 40484 20569 covid-19 16601.683982 44451.316018 185.353320 -113.275015
163231 30915315 33104859 43106 43947 québec 23671.668807 63381.331193 126.315068 -77.194956
43052 10447357 7822238 19543 13609 cas 9014.774497 24137.225503 110.886262 -67.765946
198926 6725261 1255218 9602 2804 trump 3373.470451 9032.529549 107.237597 -65.536136
53946 11194453 6089840 15904 9807 coronavirus 6991.399224 18719.600776 106.591458 -65.141261
147586 9513465 8528549 14476 9887 pandémie 6624.847704 17738.152296 96.459599 -58.949376
21339 5204919 1960414 7304 3169 américain 2847.844272 7625.155728 83.503031 -51.031226
65597 3970694 782475 5759 1743 donald 2039.962545 5462.037455 82.341629 -50.321458
173210 4615773 4560203 9712 6996 région 4543.281018 12164.718982 76.682828 -46.863193
215948 4374429 1754918 5860 2515 états-unis 2277.350881 6097.649119 75.073956 -45.879963
47391 2598283 642783 4401 1122 chronique 1501.827930 4021.172070 74.810732 -45.719099
68625 4260668 2378931 6659 3862 décès 2860.896552 7660.103448 71.009379 -43.395977
132959 11089173 9783321 14437 15602 montréal 8168.279775 21870.720225 69.360719 -42.388431
116951 6841569 6291377 9049 7456 legault 4488.080751 12016.919249 68.080393 -41.605985
214208 5673604 8144655 8727 7726 école 4473.940782 11979.059218 63.585240 -38.858862
47453 3248344 2585287 4763 2614 chsld 2005.972233 5371.027767 61.557175 -37.619450
34431 2027471 932441 3795 1569 bilan 1458.592254 3905.407746 61.176086 -37.386555
145737 2396808 1640262 4704 2593 ottawa 1984.218433 5312.781567 61.057538 -37.314106
130761 7273716 10170695 10637 11248 ministre 5951.023765 15933.976235 60.744119 -37.122567
34106 2121888 506683 2776 692 biden 943.027207 2524.972793 59.688946 -36.477718
205336 5027200 5664726 8875 8869 ville 4824.992721 12919.007279 58.305225 -35.632085
125059 3297016 2814953 5561 4003 mardi 2600.666726 6963.333274 58.049470 -35.475785
160868 7401078 10055039 10967 12593 public 6406.493941 17153.506059 56.977426 -34.820627
180642 1806500 521000 3016 1107 sherbrooke 1121.136440 3001.863560 56.591175 -34.584577
22297 6313451 8655313 9040 9451 annoncer 5028.118823 13462.881177 56.577701 -34.576343
140948 10697607 20944773 19471 28669 nouveau 13090.348826 35049.651174 55.768555 -34.081849
207966 1120127 446454 2137 398 washington 689.323520 1845.676480 55.139138 -33.697193
44301 3906192 4249018 7531 7592 centre 4112.283866 11010.716134 53.311573 -32.580313
174730 1703408 939600 3417 1843 saguenay 1430.312315 3829.687685 52.530818 -32.103170
176299 4051350 4324717 5983 5440 samedi 3106.170641 8316.829359 51.618033 -31.545340
214113 1470479 556066 2531 955 éclosion 947.921812 2538.078188 51.418110 -31.423161
176772 9103052 17195543 14215 19889 santé 9273.644710 24830.355290 51.312226 -31.358452
190467 1598615 690826 2528 962 sûreté 949.009502 2540.990498 51.255944 -31.324056
159419 2607293 1766647 3890 2559 province 1753.628159 4695.371841 51.016198 -31.177540
51882 2254806 1531256 4021 2782 confirmer 1849.888722 4953.111278 50.478830 -30.849139
128734 2861746 3093231 5043 4201 mercredi 2513.651528 6730.348472 50.449415 -30.831162
77172 968133 354289 2236 730 estrie 806.522115 2159.477885 50.334910 -30.761185
94908 1510921 651174 2679 1241 hausse 1065.936174 2854.063826 49.406712 -30.193935
154729 4773481 4813915 5817 5520 policier 3082.785306 8254.214694 49.244867 -30.095027
73451 3115314 2716606 4622 3741 employé 2274.087811 6088.912189 49.235487 -30.089294
15340 1629599 631091 2480 1067 accuser 964.509084 2582.490916 48.797793 -29.821806
202729 2623872 2262222 3675 2509 vaccin 1681.568698 4502.431302 48.612053 -29.708295
107786 1797201 548363 2243 842 joe 838.880892 2246.119108 48.479016 -29.626992
87803 1226797 568231 2627 1276 gatineau 1061.313491 2841.686509 48.059912 -29.370864
81722 4063406 3767447 4817 4203 fermer 2452.740889 6567.259111 47.738551 -29.174471
198519 1259754 737242 2774 1467 trois-rivières 1153.223294 3087.776706 47.727258 -29.167570
146264 779880 380524 2026 672 outaouais 733.646887 1964.353113 47.713102 -29.158919
114165 2184596 2021085 4172 3298 lac 2031.261025 5438.738975 47.498604 -29.027832
47599 375175 37707 1257 61 châteauguay 358.393846 959.606154 47.466708 -29.008340
21386 17205263 31874299 20487 33739 an 14745.269120 39480.730880 47.284247 -28.896833
In [25]:
fig = px.bar(graph_media1, x="res_media", y="mot", labels={"res_media":"Résiduel de Pearson","mot":"Lemme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="red", opacity=0.75, textfont_size=12)
fig
In [26]:
graph_nonmedia1 = khi2_1.sort_values(by=["res_nonmedia"],ascending=False).head(50)
graph_nonmedia1
Out[26]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia
151993 3408499 35491334 3734 40718 photo 12087.498671 32364.501329 -75.980157 46.433770
63120 213416 30624447 362 22871 dieu 6317.575286 16915.424714 -74.928753 45.791225
216431 2049969 29343236 2718 31019 être 9173.849155 24563.150845 -67.402744 41.191854
36388 3654909 38886326 5090 41228 bon 12594.906043 33723.093957 -66.872536 40.867828
193947 562582 24280117 678 20160 the 5666.320915 15171.679085 -66.267991 40.498372
165786 678651 14680038 1373 20995 recette 6082.362329 16285.637671 -60.384568 36.902834
21088 623864 17017660 817 17882 amour 5084.678702 13614.321298 -59.849422 36.575790
80102 9796398 65682702 14234 71080 faire 23198.795591 62115.204409 -58.858294 35.970082
195564 124789 18813824 188 13833 to 3812.625278 10208.374722 -58.701751 35.874414
85716 393247 15422503 817 17157 from 4887.534894 13086.465106 -58.224604 35.582815
103997 1029476 7927713 1347 18597 intérieur 5423.222206 14520.777794 -55.351469 33.826955
109316 237407 15850581 137 12069 jésus 3319.085953 8886.914047 -55.233512 33.754868
21622 154322 19777582 175 11950 and 3297.060231 8827.939769 -54.372317 33.228566
204993 5275628 44358848 6514 39880 vie 12615.572153 33778.427847 -54.323567 33.198773
29161 2037312 23127330 2892 24520 avoir 7453.939385 19958.060615 -52.839242 32.291658
155764 873038 14638629 1674 18088 post 5373.732312 14388.267688 -50.469882 30.843670
177503 2203280 24643627 3261 24385 savoir 7517.569249 20128.430751 -49.093160 30.002313
194922 179836 10638058 283 10423 timeline 2911.202213 7794.797787 -48.710500 29.768459
18300 745996 16738599 1303 15456 aimer 4557.149065 12201.850935 -48.204860 29.459447
151372 3703201 25678564 4341 28165 petit 8839.112566 23666.887434 -47.843835 29.238814
47161 1227751 16494771 1782 16579 chose 4992.768899 13368.231101 -45.439999 27.769757
18288 382176 10620450 453 10205 aime 2898.149933 7759.850067 -45.419785 27.757403
178904 24441 10032154 29 7494 seigneur 2045.672917 5477.327083 -44.587936 27.249035
206794 2463296 20510622 4678 27239 vous 8678.950218 23238.049782 -42.946675 26.246011
100831 333357 9261872 469 9019 in 2580.000616 6907.999384 -41.560279 25.398742
70444 1614049 8720363 2618 18497 détail 5741.643445 15373.356555 -41.223356 25.192839
143030 583293 9193004 1048 11151 of 3317.182495 8881.817505 -39.398959 24.077894
20710 3491 10596101 4 5696 amen 1549.958212 4150.041788 -39.267907 23.997805
84084 39625 8932792 96 6111 for 1687.822916 4519.177084 -38.746396 23.679094
47296 14136 6572662 39 5666 christ 1551.317824 4153.682176 -38.396592 23.465318
146013 486303 8678715 755 9459 oui 2777.416346 7436.583654 -38.375149 23.452213
206355 4736319 29281273 6057 30116 voir 9836.252349 26336.747651 -38.105798 23.287605
79892 456429 6773977 534 8085 facile 2343.699969 6275.300031 -37.381401 22.844904
32683 1295379 11363695 1712 13339 bel 4092.705446 10958.294554 -37.213492 22.742289
153009 125411 10222671 159 5897 pis 1646.762619 4409.237381 -36.662168 22.405359
124002 590285 10679703 473 7546 maman 2180.546474 5838.453526 -36.567044 22.347226
86985 1061777 10850204 1572 12427 gagner 3806.642983 10192.357017 -36.219052 22.134557
206949 948316 8260777 1246 10776 vraiment 3269.052214 8752.947786 -35.383123 21.623696
127963 1331223 13433618 2134 14386 meilleur 4492.159589 12027.840411 -35.184032 21.502025
206786 4518194 30909432 6813 31197 vouloir 10335.773969 27674.226031 -34.650801 21.176152
33570 1584366 15392669 2007 13563 besoin 4233.833220 11336.166780 -34.223216 20.914842
106068 2200820 19816445 2981 17148 jamais 5473.527867 14655.472133 -33.690415 20.589230
211827 68840 9247479 85 4671 you 1293.263378 3462.736622 -33.598363 20.532975
49627 629506 8519367 928 8724 coeur 2624.595905 7027.404095 -33.116710 20.238623
20835 946437 12212437 1284 10196 ami 3121.670223 8358.329777 -32.890772 20.100545
146172 3256 6798256 8 3943 our 1074.365771 2876.634229 -32.533450 19.882174
145866 0 2542031 0 3769 ouattara 1024.875877 2744.124123 -32.013683 19.564529
148267 689524 6242653 697 7367 parfait 2192.782986 5871.217014 -31.942637 19.521111
69185 98267 3159325 205 4929 délicieux 1396.050080 3737.949920 -31.877153 19.481091
63754 1255428 12088743 1837 12111 dire 3792.774936 10155.225064 -31.757064 19.407701
In [27]:
fig = px.bar(graph_nonmedia1, x="res_nonmedia", y="mot", labels={"res_nonmedia":"Résiduel de Pearson","mot":"Lemme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="darkorange", opacity=0.75, textfont_size=12)
fig
In [28]:
khi2_1["exp_inter_media"] = ((khi2_1.interactions_media + khi2_1.interactions_nonmedia) * khi2_1.interactions_media.sum()) / (khi2_1.interactions_media.sum() + khi2_1.interactions_nonmedia.sum())
khi2_1["exp_inter_nonmedia"] = ((khi2_1.interactions_media + khi2_1.interactions_nonmedia) * khi2_1.interactions_nonmedia.sum()) / (khi2_1.interactions_media.sum() + khi2_1.interactions_nonmedia.sum())
khi2_1
Out[28]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia
0 0 292 0 1 #1 0.271922 0.728078 -0.521462 0.318681 65.203619 226.796381
1 0 838 0 1 #6 0.271922 0.728078 -0.521462 0.318681 187.125455 650.874545
2 325 1217 1 1 #achatlocal 0.543845 1.456155 0.618550 -0.378015 344.328701 1197.671299
3 0 391 0 1 #acq 0.271922 0.728078 -0.521462 0.318681 87.310326 303.689674
4 0 273 0 1 #amen 0.271922 0.728078 -0.521462 0.318681 60.960918 212.039082
... ... ... ... ... ... ... ... ... ... ... ...
221204 0 2311 0 1 et 0.271922 0.728078 -0.521462 0.318681 516.046452 1794.953548
221205 0 151 0 1 les 0.271922 0.728078 -0.521462 0.318681 33.718310 117.281690
221206 0 181 0 1 nouvelle 0.271922 0.728078 -0.521462 0.318681 40.417312 140.582688
221207 0 151 0 1 trips 0.271922 0.728078 -0.521462 0.318681 33.718310 117.281690
221208 0 243 0 1 wψ 0.271922 0.728078 -0.521462 0.318681 54.261916 188.738084

221209 rows × 11 columns

In [29]:
khi2_1["res_inter_media"] = (khi2_1.interactions_media - khi2_1.exp_inter_media) / np.sqrt(khi2_1.exp_inter_media)
khi2_1["res_inter_nonmedia"] = (khi2_1.interactions_nonmedia - khi2_1.exp_inter_nonmedia) / np.sqrt(khi2_1.exp_inter_nonmedia)
khi2_1
Out[29]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
0 0 292 0 1 #1 0.271922 0.728078 -0.521462 0.318681 65.203619 226.796381 -8.074876 4.329658
1 0 838 0 1 #6 0.271922 0.728078 -0.521462 0.318681 187.125455 650.874545 -13.679381 7.334732
2 325 1217 1 1 #achatlocal 0.543845 1.456155 0.618550 -0.378015 344.328701 1197.671299 -1.041636 0.558514
3 0 391 0 1 #acq 0.271922 0.728078 -0.521462 0.318681 87.310326 303.689674 -9.343999 5.010148
4 0 273 0 1 #amen 0.271922 0.728078 -0.521462 0.318681 60.960918 212.039082 -7.807747 4.186427
... ... ... ... ... ... ... ... ... ... ... ... ... ...
221204 0 2311 0 1 et 0.271922 0.728078 -0.521462 0.318681 516.046452 1794.953548 -22.716656 12.180418
221205 0 151 0 1 les 0.271922 0.728078 -0.521462 0.318681 33.718310 117.281690 -5.806747 3.113513
221206 0 181 0 1 nouvelle 0.271922 0.728078 -0.521462 0.318681 40.417312 140.582688 -6.357461 3.408800
221207 0 151 0 1 trips 0.271922 0.728078 -0.521462 0.318681 33.718310 117.281690 -5.806747 3.113513
221208 0 243 0 1 wψ 0.271922 0.728078 -0.521462 0.318681 54.261916 188.738084 -7.366269 3.949711

221209 rows × 13 columns

In [30]:
graph_interactions_media1 = khi2_1.sort_values(by=["res_inter_media"],ascending=False).head(50)
graph_interactions_media1
Out[30]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
55017 27296939 16912105 40484 20569 covid-19 16601.683982 44451.316018 185.353320 -113.275015 9.871882e+06 3.433716e+07 5545.927779 -2973.664723
163231 30915315 33104859 43106 43947 québec 23671.668807 63381.331193 126.315068 -77.194956 1.429571e+07 4.972446e+07 4395.597038 -2356.870188
53946 11194453 6089840 15904 9807 coronavirus 6991.399224 18719.600776 106.591458 -65.141261 3.859584e+06 1.342471e+07 3733.551453 -2001.888717
198926 6725261 1255218 9602 2804 trump 3373.470451 9032.529549 107.237597 -65.536136 1.782041e+06 6.198438e+06 3702.976807 -1985.494933
43052 10447357 7822238 19543 13609 cas 9014.774497 24137.225503 110.886262 -67.765946 4.079602e+06 1.418999e+07 3152.662467 -1690.422511
132959 11089173 9783321 14437 15602 montréal 8168.279775 21870.720225 69.360719 -42.388431 4.660829e+06 1.621166e+07 2977.607794 -1596.560144
21339 5204919 1960414 7304 3169 américain 2847.844272 7625.155728 83.503031 -51.031226 1.600019e+06 5.565314e+06 2849.906210 -1528.087977
65597 3970694 782475 5759 1743 donald 2039.962545 5462.037455 82.341629 -50.321458 1.061383e+06 3.691786e+06 2823.931145 -1514.160437
147586 9513465 8528549 14476 9887 pandémie 6624.847704 17738.152296 96.459599 -58.949376 4.028783e+06 1.401323e+07 2732.527407 -1465.150770
215948 4374429 1754918 5860 2515 états-unis 2277.350881 6097.649119 75.073956 -45.879963 1.368684e+06 4.760663e+06 2569.216269 -1377.585156
116951 6841569 6291377 9049 7456 legault 4488.080751 12016.919249 68.080393 -41.605985 2.932588e+06 1.020036e+07 2282.643493 -1223.928024
68625 4260668 2378931 6659 3862 décès 2860.896552 7660.103448 71.009379 -43.395977 1.482623e+06 5.156976e+06 2281.518226 -1223.324669
47391 2598283 642783 4401 1122 chronique 1501.827930 4021.172070 74.810732 -45.719099 7.237303e+05 2.517336e+06 2203.480965 -1181.481958
95894 2897206 970374 1862 1658 heur 957.167176 2562.832824 29.246537 -17.873443 8.636309e+05 3.003949e+06 2188.246005 -1173.313143
126096 7146483 8175026 7343 10350 masque 4811.124673 12881.875327 36.502198 -22.307596 3.421294e+06 1.190022e+07 2013.970572 -1079.868596
34106 2121888 506683 2776 692 biden 943.027207 2524.972793 59.688946 -36.477718 5.869601e+05 2.041611e+06 2003.473975 -1074.240438
83494 2489352 912886 1081 1013 floyd 569.405701 1524.594299 21.439507 -13.102331 7.597200e+05 2.642518e+06 1984.389629 -1064.007624
21386 17205263 31874299 20487 33739 an 14745.269120 39480.730880 47.284247 -28.896833 1.095947e+07 3.812009e+07 1886.656616 -1011.604270
82484 1866064 441121 1057 855 fillette 519.915807 1392.084193 23.554607 -14.394932 5.151946e+05 1.791990e+06 1882.035198 -1009.126317
61484 7819216 10602060 9418 13029 dernier 6103.844206 16343.155794 42.420052 -25.924176 4.113472e+06 1.430780e+07 1827.137023 -979.690527
154729 4773481 4813915 5817 5520 policier 3082.785306 8254.214694 49.244867 -30.095027 2.140866e+06 7.446530e+06 1799.254376 -964.740162
173210 4615773 4560203 9712 6996 région 4543.281018 12164.718982 76.682828 -46.863193 2.048996e+06 7.126980e+06 1793.153929 -961.469171
180642 1806500 521000 3016 1107 sherbrooke 1121.136440 3001.863560 56.591175 -34.584577 5.197309e+05 1.807769e+06 1784.889572 -957.037915
160868 7401078 10055039 10967 12593 public 6406.493941 17153.506059 56.977426 -34.820627 3.897952e+06 1.355816e+07 1774.342692 -951.382795
107786 1797201 548363 2243 842 joe 838.880892 2246.119108 48.479016 -29.626992 5.237646e+05 1.821799e+06 1759.580710 -943.467585
81722 4063406 3767447 4817 4203 fermer 2452.740889 6567.259111 47.738551 -29.174471 1.748630e+06 6.082223e+06 1750.491523 -938.594064
133332 5329565 6047038 6454 7054 mort 3673.129039 9834.870961 45.884156 -28.041194 2.540396e+06 8.836207e+06 1749.943683 -938.300318
155386 2836370 1899969 3138 2725 port 1594.281578 4268.718422 38.662112 -23.627585 1.057625e+06 3.678714e+06 1729.608950 -927.397061
130761 7273716 10170695 10637 11248 ministre 5951.023765 15933.976235 60.744119 -37.122567 3.895338e+06 1.354907e+07 1711.731506 -917.811374
205336 5027200 5664726 8875 8869 ville 4824.992721 12919.007279 58.305225 -35.632085 2.387508e+06 8.304418e+06 1708.365906 -916.006777
47453 3248344 2585287 4763 2614 chsld 2005.972233 5371.027767 61.557175 -37.619450 1.302650e+06 4.530981e+06 1704.749594 -914.067751
81732 2153757 1056370 2912 1698 fermeture 1253.562694 3356.437306 46.840986 -28.625942 7.168216e+05 2.493305e+06 1697.194911 -910.017013
34431 2027471 932441 3795 1569 bilan 1458.592254 3905.407746 61.176086 -37.386555 6.609485e+05 2.298963e+06 1680.865440 -901.261333
125059 3297016 2814953 5561 4003 mardi 2600.666726 6963.333274 58.049470 -35.475785 1.364803e+06 4.747166e+06 1653.940894 -886.824691
41790 6726145 9373530 10095 13219 canadien 6339.601008 16974.398992 47.165574 -28.824307 3.595058e+06 1.250462e+07 1651.361245 -885.441512
159419 2607293 1766647 3890 2559 province 1753.628159 4695.371841 51.016198 -31.177540 9.767011e+05 3.397239e+06 1649.925929 -884.671912
22297 6313451 8655313 9040 9451 annoncer 5028.118823 13462.881177 56.577701 -34.576343 3.342526e+06 1.162624e+07 1625.003478 -871.308771
115145 3940355 4086105 4386 5275 lancer 2627.043207 7033.956793 34.317968 -20.972747 1.792309e+06 6.234151e+06 1604.490390 -860.309882
176299 4051350 4324717 5983 5440 samedi 3106.170641 8316.829359 51.618033 -31.545340 1.870376e+06 6.505691e+06 1594.725467 -855.074039
73451 3115314 2716606 4622 3741 employé 2274.087811 6088.912189 49.235487 -30.089294 1.302268e+06 4.529652e+06 1588.761073 -851.875997
15340 1629599 631091 2480 1067 accuser 964.509084 2582.490916 48.797793 -29.821806 5.048122e+05 1.755878e+06 1583.088774 -848.834574
145737 2396808 1640262 4704 2593 ottawa 1984.218433 5312.781567 61.057538 -37.314106 9.014780e+05 3.135592e+06 1574.923569 -844.456482
88593 1941793 1021205 1154 1204 george 641.193239 1716.806761 20.251597 -12.376363 6.616376e+05 2.301360e+06 1573.810983 -843.859925
44301 3906192 4249018 7531 7592 centre 4112.283866 11010.716134 53.311573 -32.580313 1.821059e+06 6.334151e+06 1545.154023 -828.494382
169331 4801954 5971356 5025 7100 retrouver 3297.060231 8827.939769 30.092977 -18.390728 2.405681e+06 8.367629e+06 1544.960291 -828.390506
85209 4832754 6077182 6384 6881 françois 3607.051873 9657.948127 46.237205 -28.256953 2.436189e+06 8.473747e+06 1535.442682 -823.287269
51882 2254806 1531256 4021 2782 confirmer 1849.888722 4953.111278 50.478830 -30.849139 8.454279e+05 2.940634e+06 1532.813211 -821.877376
190467 1598615 690826 2528 962 sûreté 949.009502 2540.990498 51.255944 -31.324056 5.112323e+05 1.778209e+06 1520.804034 -815.438189
164877 2391778 1762986 2795 2324 rassemblement 1391.971243 3727.028757 37.605506 -22.981861 9.277591e+05 3.227005e+06 1519.949245 -814.979861
214113 1470479 556066 2531 955 éclosion 947.921812 2538.078188 51.418110 -31.423161 4.525276e+05 1.574017e+06 1513.228386 -811.376211
In [31]:
fig = px.bar(graph_interactions_media1, x="res_inter_media", y="mot", labels={"res_inter_media":"Résiduel de Pearson (<b>en fonction des interactions</b>)","mot":"Lemme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="gold", opacity=0.75, textfont_size=12)
fig
In [32]:
graph_interactions_nonmedia1 = khi2_1.sort_values(by=["res_inter_nonmedia"],ascending=False).head(50)
graph_interactions_nonmedia1
Out[32]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
63120 213416 30624447 362 22871 dieu 6317.575286 16915.424714 -74.928753 45.791225 6.886097e+06 2.395177e+07 -2542.809290 1363.426028
193947 562582 24280117 678 20160 the 5666.320915 15171.679085 -66.267991 40.498372 5.547376e+06 1.929532e+07 -2116.427661 1134.804946
21622 154322 19777582 175 11950 and 3297.060231 8827.939769 -54.372317 33.228566 4.450795e+06 1.548111e+07 -2036.541741 1091.971005
195564 124789 18813824 188 13833 to 3812.625278 10208.374722 -58.701751 35.874414 4.228994e+06 1.470962e+07 -1995.769975 1070.109639
36388 3654909 38886326 5090 41228 bon 12594.906043 33723.093957 -66.872536 40.867828 9.499461e+06 3.304177e+07 -1896.276760 1016.762484
216431 2049969 29343236 2718 31019 être 9173.849155 24563.150845 -67.402744 41.191854 7.010105e+06 2.438310e+07 -1873.403416 1004.498052
151993 3408499 35491334 3734 40718 photo 12087.498671 32364.501329 -75.980157 46.433770 8.686335e+06 3.021350e+07 -1790.760964 960.186089
109316 237407 15850581 137 12069 jésus 3319.085953 8886.914047 -55.233512 33.754868 3.592449e+06 1.249554e+07 -1770.119719 949.118483
204993 5275628 44358848 6514 39880 vie 12615.572153 33778.427847 -54.323567 33.198773 1.108338e+07 3.855109e+07 -1744.504313 935.383788
80102 9796398 65682702 14234 71080 faire 23198.795591 62115.204409 -58.858294 35.970082 1.685449e+07 5.862461e+07 -1719.211901 921.822278
21088 623864 17017660 817 17882 amour 5084.678702 13614.321298 -59.849422 36.575790 3.939353e+06 1.370217e+07 -1670.456525 895.680189
85716 393247 15422503 817 17157 from 4887.534894 13086.465106 -58.224604 35.582815 3.531658e+06 1.228409e+07 -1670.015484 895.443708
18300 745996 16738599 1303 15456 aimer 4557.149065 12201.850935 -48.204860 29.459447 3.904311e+06 1.358028e+07 -1598.391856 857.039915
177503 2203280 24643627 3261 24385 savoir 7517.569249 20128.430751 -49.093160 30.002313 5.994916e+06 2.085199e+07 -1548.585164 830.334122
20710 3491 10596101 4 5696 amen 1549.958212 4150.041788 -39.267907 23.997805 2.366890e+06 8.232702e+06 -1536.200748 823.693736
29161 2037312 23127330 2892 24520 avoir 7453.939385 19958.060615 -52.839242 32.291658 5.619266e+06 1.954538e+07 -1511.054839 810.210780
165786 678651 14680038 1373 20995 recette 6082.362329 16285.637671 -60.384568 36.902834 3.429596e+06 1.192909e+07 -1485.458250 796.486174
178904 24441 10032154 29 7494 seigneur 2045.672917 5477.327083 -44.587936 27.249035 2.245638e+06 7.810957e+06 -1482.235586 794.758217
194922 179836 10638058 283 10423 timeline 2911.202213 7794.797787 -48.710500 29.768459 2.415636e+06 8.402258e+06 -1438.524461 771.320798
153009 125411 10222671 159 5897 pis 1646.762619 4409.237381 -36.662168 22.405359 2.310727e+06 8.037355e+06 -1437.606298 770.828490
208203 190866 10160401 410 4952 we 1458.048409 3903.951591 -27.447031 16.773710 2.311439e+06 8.039828e+06 -1394.800084 747.876274
211827 68840 9247479 85 4671 you 1293.263378 3462.736622 -33.598363 20.532975 2.080335e+06 7.235984e+06 -1394.608410 747.773501
155764 873038 14638629 1674 18088 post 5373.732312 14388.267688 -50.469882 30.843670 3.463756e+06 1.204791e+07 -1392.023362 746.387427
84084 39625 8932792 96 6111 for 1687.822916 4519.177084 -38.746396 23.679094 2.003541e+06 6.968876e+06 -1387.470716 743.946349
47161 1227751 16494771 1782 16579 chose 4992.768899 13368.231101 -45.439999 27.769757 3.957440e+06 1.376508e+07 -1372.164041 735.739080
18288 382176 10620450 453 10205 aime 2898.149933 7759.850067 -45.419785 27.757403 2.456887e+06 8.545739e+06 -1323.625213 709.713101
51536 642322 12050531 1096 9001 concours 2745.601415 7351.398585 -31.481852 19.239511 2.834315e+06 9.858538e+06 -1302.012321 698.124509
139727 1022434 13779514 1534 10205 nom 3192.098148 8546.901852 -29.347568 17.935186 3.305276e+06 1.149667e+07 -1255.659781 673.270793
100831 333357 9261872 469 9019 in 2580.000616 6907.999384 -41.560279 25.398742 2.142615e+06 7.452614e+06 -1236.028452 662.744693
146172 3256 6798256 8 3943 our 1074.365771 2876.634229 -32.533450 19.882174 1.518778e+06 5.282734e+06 -1229.745118 659.375640
106068 2200820 19816445 2981 17148 jamais 5473.527867 14655.472133 -33.690415 20.589230 4.916457e+06 1.710081e+07 -1224.744681 656.694461
124002 590285 10679703 473 7546 maman 2180.546474 5838.453526 -36.567044 22.347226 2.516589e+06 8.753399e+06 -1214.279569 651.083184
206786 4518194 30909432 6813 31197 vouloir 10335.773969 27674.226031 -34.650801 21.176152 7.910991e+06 2.751663e+07 -1206.264234 646.785451
47296 14136 6572662 39 5666 christ 1551.317824 4153.682176 -38.396592 23.465318 1.470832e+06 5.115966e+06 -1201.122927 644.028739
206794 2463296 20510622 4678 27239 vous 8678.950218 23238.049782 -42.946675 26.246011 5.130077e+06 1.784384e+07 -1177.403868 631.310844
20835 946437 12212437 1284 10196 ami 3121.670223 8358.329777 -32.890772 20.100545 2.938377e+06 1.022050e+07 -1162.043956 623.075030
33570 1584366 15392669 2007 13563 besoin 4233.833220 11336.166780 -34.223216 20.914842 3.790973e+06 1.318606e+07 -1133.312424 607.669503
166816 615602 9880749 633 5712 regarder 1725.348220 4619.651780 -26.298000 16.071503 2.343836e+06 8.152515e+06 -1128.856956 605.280530
151372 3703201 25678564 4341 28165 petit 8839.112566 23666.887434 -47.843835 29.238814 6.560950e+06 2.282081e+07 -1115.682763 598.216675
119524 101251 6210016 132 4002 live 1124.127587 3009.872413 -29.591006 18.083958 1.409306e+06 4.901961e+06 -1101.852381 590.800978
146013 486303 8678715 755 9459 oui 2777.416346 7436.583654 -38.375149 23.452213 2.046549e+06 7.118469e+06 -1090.641515 584.789836
32967 109721 6155709 199 3878 ben 1108.628005 2968.371995 -27.319384 16.695701 1.399071e+06 4.866359e+06 -1090.061333 584.478749
143030 583293 9193004 1048 11151 of 3317.182495 8881.817505 -39.398959 24.077894 2.183048e+06 7.593249e+06 -1082.734045 580.549939
127963 1331223 13433618 2134 14386 meilleur 4492.159589 12027.840411 -35.184032 21.502025 3.296990e+06 1.146785e+07 -1082.612954 580.485012
147049 742770 9783489 1225 8006 page 2510.116535 6720.883465 -25.650484 15.675786 2.350514e+06 8.175745e+06 -1048.662008 562.280892
206355 4736319 29281273 6057 30116 voir 9836.252349 26336.747651 -38.105798 23.287605 7.596131e+06 2.642146e+07 -1037.626853 556.363965
165323 39246 5064512 77 1060 re 309.175875 827.824125 -13.204267 8.069526 1.139669e+06 3.964089e+06 -1030.790465 552.698370
102199 52843 5046279 90 3091 ingrédient 864.985451 2316.014549 -26.350517 16.103598 1.138634e+06 3.960488e+06 -1017.546399 545.597049
158062 14034 4611788 29 3375 prière 925.624167 2478.375833 -29.470880 18.010545 1.032946e+06 3.592876e+06 -1002.531310 537.546125
63754 1255428 12088743 1837 12111 dire 3792.774936 10155.225064 -31.757064 19.407701 2.979754e+06 1.036442e+07 -998.916570 535.607942
In [33]:
fig = px.bar(graph_interactions_nonmedia1, x="res_inter_nonmedia", y="mot", labels={"res_inter_nonmedia":"Résiduel de Pearson (<b>en fonction des interactions</b>)","mot":"Lemme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="lightyellow", opacity=0.75, textfont_size=12)
fig

bigrammes

In [34]:
media2 = pan.read_csv("canada-bigrammes-media-nettoye.csv", low_memory=False, names=["bigramme","interactions"])
nonmedia2 = pan.read_csv("canada-bigrammes-nonmedia-nettoye.csv", low_memory=False, names=["bigramme","interactions"])
In [35]:
media2
Out[35]:
bigramme interactions
0 recueillir endroit 249663
1 endroit frère 249663
2 frère tuer 249663
3 tuer terrence 249663
4 terrence floyd 249663
... ... ...
3761083 snapie risotto 363
3761084 risotto tomate 363
3761085 maskinongé demeure 145
3761086 demeure zone 145
3761087 zone orange 145

3761088 rows × 2 columns

In [36]:
nonmedia2
Out[36]:
bigramme interactions
0 guinéen incroyable 197962
1 incroyable talent 197962
2 talent guinéen 197962
3 guinéen incroyable 197962
4 incroyable talent 197962
... ... ...
10126893 ale przede 149
10126894 przede wszystkim 149
10126895 wszystkim by 149
10126896 by cię 149
10126897 cię kochać 149

10126898 rows × 2 columns

In [37]:
media2_table = pan.pivot_table(media2,index=["bigramme"],values=["bigramme","interactions"],aggfunc=[len,np.sum])
nonmedia2_table = pan.pivot_table(nonmedia2,index=["bigramme"],values=["bigramme","interactions"],aggfunc=[len,np.sum])
In [38]:
media2_table
Out[38]:
len sum
interactions interactions
bigramme
#achatlocal bâton 1 325
#cavabienaller joignez 1 334
#jourdelaterrechezsoi jourdelaterre 1 247
#revuedepress presse 1 184
' ' 10 1470
... ... ...
瑞金日 fedex 1 236
蜜ய this 1 254
黑瑞金包装 gpackaging 1 390
프블 radio 1 666
화대 cinéma 1 279

1233484 rows × 2 columns

In [39]:
nonmedia2_table
Out[39]:
len sum
interactions interactions
bigramme
#6 canada 1 838
#achatlocal timeline 1 1217
#arteta propos 1 120
#assnat covid19qc 1 145
#aujourdhui souvenir 2 430
... ... ...
concours soirée 1 425
et 〰 1 2311
les trips 1 151
nouvelle collection 1 181
trips boostes 1 151

3509063 rows × 2 columns

In [40]:
media2_table["type"] = "media"
nonmedia2_table["type"] = "non-media"
In [41]:
media2_table
Out[41]:
len sum type
interactions interactions
bigramme
#achatlocal bâton 1 325 media
#cavabienaller joignez 1 334 media
#jourdelaterrechezsoi jourdelaterre 1 247 media
#revuedepress presse 1 184 media
' ' 10 1470 media
... ... ... ...
瑞金日 fedex 1 236 media
蜜ய this 1 254 media
黑瑞金包装 gpackaging 1 390 media
프블 radio 1 666 media
화대 cinéma 1 279 media

1233484 rows × 3 columns

In [42]:
nonmedia2_table
Out[42]:
len sum type
interactions interactions
bigramme
#6 canada 1 838 non-media
#achatlocal timeline 1 1217 non-media
#arteta propos 1 120 non-media
#assnat covid19qc 1 145 non-media
#aujourdhui souvenir 2 430 non-media
... ... ... ...
concours soirée 1 425 non-media
et 〰 1 2311 non-media
les trips 1 151 non-media
nouvelle collection 1 181 non-media
trips boostes 1 151 non-media

3509063 rows × 3 columns

In [43]:
tableau2 = pan.concat([media2_table,nonmedia2_table], names = ["bigramme","nb","interactions","type"], levels=0)
In [44]:
tableau2
Out[44]:
len sum type
interactions interactions
bigramme
#achatlocal bâton 1 325 media
#cavabienaller joignez 1 334 media
#jourdelaterrechezsoi jourdelaterre 1 247 media
#revuedepress presse 1 184 media
' ' 10 1470 media
... ... ... ...
concours soirée 1 425 non-media
et 〰 1 2311 non-media
les trips 1 151 non-media
nouvelle collection 1 181 non-media
trips boostes 1 151 non-media

4742547 rows × 3 columns

In [45]:
tableau2["bigramme"] = tableau2.index
tableau2.columns = ["nb","interactions","media","bigramme"]
tableau2.reset_index(drop=True, inplace=True)
tableau2
Out[45]:
nb interactions media bigramme
0 1 325 media #achatlocal bâton
1 1 334 media #cavabienaller joignez
2 1 247 media #jourdelaterrechezsoi jourdelaterre
3 1 184 media #revuedepress presse
4 10 1470 media ' '
... ... ... ... ...
4742542 1 425 non-media concours soirée
4742543 1 2311 non-media et 〰
4742544 1 151 non-media les trips
4742545 1 181 non-media nouvelle collection
4742546 1 151 non-media trips boostes

4742547 rows × 4 columns

In [46]:
khi2_2 = pan.pivot_table(tableau2,index=["bigramme"],columns=["media"],values=["bigramme","interactions"],aggfunc=[len,np.sum],fill_value=0)
In [47]:
khi2_2
Out[47]:
len sum
interactions nb interactions nb
media media non-media media non-media media non-media media non-media
bigramme
#6 canada 0 1 0 1 0 838 0 1
#achatlocal bâton 1 0 1 0 325 0 1 0
#achatlocal timeline 0 1 0 1 0 1217 0 1
#arteta propos 0 1 0 1 0 120 0 1
#assnat covid19qc 0 1 0 1 0 145 0 1
... ... ... ... ... ... ... ... ...
concours soirée 0 1 0 1 0 425 0 1
et 〰 0 1 0 1 0 2311 0 1
les trips 0 1 0 1 0 151 0 1
nouvelle collection 0 1 0 1 0 181 0 1
trips boostes 0 1 0 1 0 151 0 1

4309102 rows × 8 columns

In [48]:
khi2_2["bigramme"] = khi2_2.index
khi2_2.columns = khi2_2.columns.get_level_values(0)
khi2_2.reset_index(drop=True, inplace=True)
khi2_2.columns = ["a","b","c","d","interactions_media","interactions_nonmedia","obs_media","obs_nonmedia","bigramme"]
khi2_2 = khi2_2.drop(columns=["a","b","c","d"])
khi2_2
Out[48]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme
0 0 838 0 1 #6 canada
1 325 0 1 0 #achatlocal bâton
2 0 1217 0 1 #achatlocal timeline
3 0 120 0 1 #arteta propos
4 0 145 0 1 #assnat covid19qc
... ... ... ... ... ...
4309097 0 425 0 1 concours soirée
4309098 0 2311 0 1 et 〰
4309099 0 151 0 1 les trips
4309100 0 181 0 1 nouvelle collection
4309101 0 151 0 1 trips boostes

4309102 rows × 5 columns

In [49]:
khi2_2.obs_media.sum(), khi2_2.obs_nonmedia.sum()
Out[49]:
(3761088, 10126898)
In [50]:
print("Nb de lignes fichier media2 = ", media2.shape[0])
print("Somme observée média khi2_2 = ", khi2_2.obs_media.sum())

print("Nb de lignes  fichier nonmedia2 = ", nonmedia2.shape[0])
print("Somme observée nonmédia khi2_2 = ", khi2_2.obs_nonmedia.sum())
Nb de lignes fichier media2 =  3761088
Somme observée média khi2_2 =  3761088
Nb de lignes  fichier nonmedia2 =  10126898
Somme observée nonmédia khi2_2 =  10126898
In [51]:
khi2_2["exp_media"] = ((khi2_2.obs_media + khi2_2.obs_nonmedia) * khi2_2.obs_media.sum()) / (khi2_2.obs_media.sum() +khi2_2.obs_nonmedia.sum())
khi2_2["exp_nonmedia"] = ((khi2_2.obs_media + khi2_2.obs_nonmedia) * khi2_2.obs_nonmedia.sum()) / (khi2_2.obs_media.sum() +khi2_2.obs_nonmedia.sum())
khi2_2
Out[51]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia
0 0 838 0 1 #6 canada 0.270816 0.729184
1 325 0 1 0 #achatlocal bâton 0.270816 0.729184
2 0 1217 0 1 #achatlocal timeline 0.270816 0.729184
3 0 120 0 1 #arteta propos 0.270816 0.729184
4 0 145 0 1 #assnat covid19qc 0.270816 0.729184
... ... ... ... ... ... ... ...
4309097 0 425 0 1 concours soirée 0.270816 0.729184
4309098 0 2311 0 1 et 〰 0.270816 0.729184
4309099 0 151 0 1 les trips 0.270816 0.729184
4309100 0 181 0 1 nouvelle collection 0.270816 0.729184
4309101 0 151 0 1 trips boostes 0.270816 0.729184

4309102 rows × 7 columns

In [52]:
khi2_2["res_media"] = (khi2_2.obs_media - khi2_2.exp_media) / np.sqrt(khi2_2.exp_media)
khi2_2["res_nonmedia"] = (khi2_2.obs_nonmedia - khi2_2.exp_nonmedia) / np.sqrt(khi2_2.exp_nonmedia)
khi2_2
Out[52]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia
0 0 838 0 1 #6 canada 0.270816 0.729184 -0.5204 0.317143
1 325 0 1 0 #achatlocal bâton 0.270816 0.729184 1.4012 -0.853923
2 0 1217 0 1 #achatlocal timeline 0.270816 0.729184 -0.5204 0.317143
3 0 120 0 1 #arteta propos 0.270816 0.729184 -0.5204 0.317143
4 0 145 0 1 #assnat covid19qc 0.270816 0.729184 -0.5204 0.317143
... ... ... ... ... ... ... ... ... ...
4309097 0 425 0 1 concours soirée 0.270816 0.729184 -0.5204 0.317143
4309098 0 2311 0 1 et 〰 0.270816 0.729184 -0.5204 0.317143
4309099 0 151 0 1 les trips 0.270816 0.729184 -0.5204 0.317143
4309100 0 181 0 1 nouvelle collection 0.270816 0.729184 -0.5204 0.317143
4309101 0 151 0 1 trips boostes 0.270816 0.729184 -0.5204 0.317143

4309102 rows × 9 columns

In [53]:
graph_media2 = khi2_2.sort_values(by=["res_media"],ascending=False).head(50)
graph_media2
Out[53]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia
709807 2542611 449401 5101 1075 cas covid-19 1672.559253 4503.440747 83.831240 -51.088649
1225167 3775405 621290 5390 1504 donald trump 1867.005099 5026.994901 81.534096 -49.688718
2656307 2456127 593015 5262 1486 nouveau cas 1827.465971 4920.534029 80.342099 -48.962288
2785492 2519190 1208685 3510 1281 pandémie covid-19 1297.479174 3493.520826 61.423869 -37.433092
3523307 3708401 3279067 5657 4090 santé public 2639.642979 7107.357021 58.729203 -35.790901
1671498 4093936 3174715 5085 3988 françois legault 2457.113035 6615.886965 53.014432 -32.308191
2097752 1570168 353024 2020 470 joe biden 674.331694 1815.668306 51.820457 -31.580556
3786353 1558811 657166 2444 895 sûreté québec 904.254428 2434.745572 51.203971 -31.204855
3143405 957795 80448 1331 206 président américain 416.244102 1120.755898 44.836445 -27.324341
3003044 960627 168888 1483 366 positif covid-19 500.738675 1348.261325 43.895649 -26.750999
1046521 1365984 275905 1613 504 covid-19 québec 573.317348 1543.682652 43.421343 -26.461946
1762519 1484738 979190 2234 1211 gouvernement legault 932.960917 2512.039083 42.594999 -25.958354
1160221 502548 0 918 0 devoir opinion 248.609034 669.390966 42.454272 -25.872591
2661710 261760 6888 943 24 nouvelle union 261.879015 705.120985 42.089514 -25.650300
464577 260160 6314 936 23 avenir érable 259.712488 699.287512 41.964780 -25.574284
3981534 260160 6314 936 23 union avenir 259.712488 699.287512 41.964780 -25.574284
2656951 653379 66853 1072 129 nouveau décès 325.249945 875.750055 41.406321 -25.233946
2472415 1080760 409388 1564 555 ministre françois 573.858979 1545.141021 41.332765 -25.189120
138268 446345 172646 1243 333 abitibi témiscamingue 426.805923 1149.194077 39.507379 -24.076688
709728 868451 256976 1768 868 cas confirmer 713.870821 1922.129179 39.453362 -24.043769
2889868 531057 252832 1010 176 personne infecter 321.187706 864.812294 38.434517 -23.422862
2117533 225875 705 753 4 journal saint-françois 205.007667 551.992333 38.272729 -23.324265
2171438 621229 317980 1476 615 lac saint 566.276133 1524.723867 38.229222 -23.297750
1271052 873753 579930 1620 791 début pandémie 652.937234 1758.062766 37.845909 -23.064151
2322557 719285 119374 1015 200 maison blanche 329.041369 885.958631 37.815746 -23.045769
1043148 330951 4505 732 8 covid-19 atlantique 200.403796 539.596204 37.551639 -22.884816
4203473 467993 114531 902 150 éclosion covid-19 284.898370 767.101630 36.560465 -22.280772
75666 1031816 426635 1462 692 24 heure 583.337537 1570.662463 36.379951 -22.170763
290369 538026 35140 803 83 américain donald 239.942924 646.057076 36.349501 -22.152206
3487006 627335 358442 1488 727 saguenay lac 599.857310 1615.142690 36.262586 -22.099238
860856 226414 1733 681 9 cogeco nouveau 186.862999 503.137001 36.148120 -22.029480
3489253 617059 378845 1474 722 saint jean 594.711807 1601.288193 36.056035 -21.973361
2993584 2213774 1257346 2235 1763 port masque 1082.722133 2915.277867 35.018595 -21.341122
3645251 183991 2870 628 5 soleil châteauguay 171.426491 461.573509 34.871589 -21.251533
732658 868489 495829 1598 959 centre ville 692.476362 1864.523638 34.411002 -20.970841
4188744 1002643 470480 1408 740 zone rouge 581.712642 1566.287358 34.259172 -20.878312
3658528 288871 38221 701 75 sorel tracy 210.153170 565.846830 33.859303 -20.634623
2785466 657692 257806 1116 441 pandémie coronavirus 421.660420 1135.339580 33.813529 -20.606728
421227 819729 157012 975 317 atteindre covid-19 349.894196 942.105804 33.418362 -20.365904
822134 134163 354 563 1 ciho fm 152.740191 411.259809 33.195737 -20.230231
2656438 133604 354 560 1 nouveau ciho 151.927743 409.072257 33.106901 -20.176092
1619313 133604 354 560 1 fm 963 151.927743 409.072257 33.106901 -20.176092
4002199 709326 258999 954 341 vaccin covid-19 350.706644 944.293356 32.214882 -19.632476
2619890 986406 665187 1347 804 new york 582.525090 1568.474910 31.674219 -19.302984
1282929 435190 48518 658 103 déclarer positif 206.090931 554.909069 31.479050 -19.184043
3233155 624693 223658 949 371 raison pandémie 357.477042 962.522958 31.285819 -19.066284
4206498 563325 187587 883 306 école secondaire 322.000154 866.999846 31.263292 -19.052555
943899 343241 57122 672 120 confirmer covid-19 214.486225 577.513775 31.239529 -19.038074
2637596 609865 456434 1156 611 nombre cas 478.531768 1288.468232 30.969454 -18.873484
810723 381692 5278 486 5 chronique richard 132.970627 358.029373 30.614910 -18.657417
In [54]:
fig = px.bar(graph_media2, x="res_media", y="bigramme", labels={"res_media":"Résiduel de Pearson","bigramme":"Bigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="navy", opacity=0.75, textfont_size=12)
fig
In [55]:
graph_nonmedia2 = khi2_2.sort_values(by=["res_nonmedia"],ascending=False).head(50)
graph_nonmedia2
Out[55]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia
1343932 0 4587911 0 13853 détail intérieur 3751.613233 10101.386767 -61.250414 37.327384
2913509 364909 11936253 763 14127 photo from 4032.449365 10857.550635 -51.486119 31.376802
3858944 177218 10404308 280 10142 timeline photo 2822.443739 7599.556261 -47.856254 29.164681
2142652 5214 4617898 6 3868 jésus christ 1049.140956 2824.859044 -32.205206 19.626579
1100759 444 1974086 3 2597 côte ivoire 704.121447 1895.878553 -26.422230 16.102303
2703453 9539 1625068 32 2363 of the 648.604179 1746.395821 -24.211215 14.754861
1526184 24849 1621419 37 2388 facile faire 656.728657 1768.271343 -24.182912 14.737612
807916 0 1480876 0 2138 chris yapi 579.004482 1558.995518 -24.062512 14.664238
2635890 0 4710817 0 2077 nom jésus 562.484710 1514.515290 -23.716760 14.453529
240888 0 1220536 0 1892 alassane ouattara 512.383761 1379.616239 -22.635895 13.794825
4171082 0 1453954 0 1487 wwwuniversty energy 402.703305 1084.296695 -20.067469 12.229568
1396458 0 1397459 0 1435 energy energy 388.620876 1046.379124 -19.713469 12.013833
3037192 164550 2941413 217 2586 prendre soin 759.097083 2043.902917 -19.675619 11.990766
580340 15885 1581591 30 1602 bon journée 441.971616 1190.028384 -19.596121 11.942318
3561159 0 1560583 0 1407 seigneur jésus 381.038029 1025.961971 -19.520195 11.896048
3037352 158789 2283372 131 2001 prendre temps 577.379587 1554.620413 -18.576916 11.321192
1954954 86405 1265203 14 1330 in the 363.976625 980.023375 -18.344348 11.179460
3565804 20852 1034169 52 1460 semaine julie 409.473703 1102.526297 -17.665710 10.765882
707391 49174 1395237 103 1711 carte cadeau 491.260117 1322.739883 -17.517294 10.675434
1126503 0 704959 0 1050 del' amour 284.356738 765.643262 -16.862880 10.276620
2985889 41476 1255275 66 1401 pomme terre 397.286986 1069.713014 -16.620811 10.129097
3868388 5467 1215502 7 1053 to the 287.064898 772.935102 -16.529839 10.073657
2900484 7797 950897 17 1112 petit petit 305.751198 823.248802 -16.513522 10.063713
1828642 0 654655 0 961 hamed bakayoko 260.254119 700.745881 -16.132393 9.831445
579178 135317 2903777 245 2148 bon chance 648.062547 1744.937453 -15.833028 9.649005
651523 0 613702 0 925 c soupe 250.504746 674.495254 -15.827342 9.645539
2306290 7936 728716 16 1021 made in 280.836131 756.163869 -15.803408 9.630953
3742352 0 655207 0 907 super facile 245.630059 661.369941 -15.672589 9.551230
2899387 243 793826 1 906 petit gamin 245.630059 661.369941 -15.608784 9.512345
3198019 0 310778 0 893 question impossible 241.838635 651.161365 -15.551162 9.477229
1802697 0 540899 0 892 guillaume soro 241.567820 650.432180 -15.542452 9.471921
1636528 3929 2162300 7 908 for the 247.796586 667.203414 -15.296873 9.322260
2917034 25334 865560 47 1128 photos from 318.208731 856.791269 -15.203642 9.265442
605685 8978 687527 22 984 boutique ligne 272.440837 733.559163 -15.172916 9.246717
807779 0 520914 0 812 chri yapi 219.902544 592.097456 -14.829111 9.037195
1201402 30045 812133 36 1015 disponible ici 284.627554 766.372446 -14.737058 8.981096
2418232 6639 634128 26 956 meilleur recette 265.941254 716.058746 -14.713367 8.966658
651539 0 513750 0 795 c thé 215.298673 579.701327 -14.673059 8.942093
1714703 0 748113 0 777 gamin oboxadsq 210.423986 566.576014 -14.505998 8.840283
3169632 0 748113 0 777 pushcmd addbanner 210.423986 566.576014 -14.505998 8.840283
3842263 0 763077 0 775 thank you 209.882354 565.117646 -14.487317 8.828898
3209482 5780 973515 9 819 québec fier 224.235599 603.764401 -14.373476 8.759521
3122078 0 497056 0 762 préchauffer four 206.361747 555.638253 -14.365297 8.754536
1708934 15161 503753 25 909 gagner carte 252.942089 681.057911 -14.332237 8.734388
471878 51223 1535725 54 1046 avoir besoin 297.897535 802.102465 -14.131034 8.611771
576827 187 479320 1 743 bol mélanger 201.487060 542.512940 -14.124165 8.607585
3963594 797 903913 2 744 télé web 202.028692 543.971308 -14.072970 8.576386
2641886 0 896097 0 710 noovo télé 192.279318 517.720682 -13.866482 8.450547
1345172 256 610026 2 721 détails article 195.799925 527.200075 -13.849923 8.440455
3034827 8222 431143 12 774 prendre café 212.861330 573.138670 -13.767274 8.390087
In [56]:
fig = px.bar(graph_nonmedia2, x="res_nonmedia", y="bigramme", labels={"res_nonmedia":"Résiduel de Pearson","bigramme":"Bigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="aqua", opacity=0.75, textfont_size=12)
fig
In [57]:
khi2_2["exp_inter_media"] = ((khi2_2.interactions_media + khi2_2.interactions_nonmedia) * khi2_2.interactions_media.sum()) / (khi2_2.interactions_media.sum() + khi2_2.interactions_nonmedia.sum())
khi2_2["exp_inter_nonmedia"] = ((khi2_2.interactions_media + khi2_2.interactions_nonmedia) * khi2_2.interactions_nonmedia.sum()) / (khi2_2.interactions_media.sum() + khi2_2.interactions_nonmedia.sum())
khi2_2
Out[57]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia
0 0 838 0 1 #6 canada 0.270816 0.729184 -0.5204 0.317143 185.824189 652.175811
1 325 0 1 0 #achatlocal bâton 0.270816 0.729184 1.4012 -0.853923 72.067854 252.932146
2 0 1217 0 1 #achatlocal timeline 0.270816 0.729184 -0.5204 0.317143 269.866394 947.133606
3 0 120 0 1 #arteta propos 0.270816 0.729184 -0.5204 0.317143 26.609669 93.390331
4 0 145 0 1 #assnat covid19qc 0.270816 0.729184 -0.5204 0.317143 32.153350 112.846650
... ... ... ... ... ... ... ... ... ... ... ...
4309097 0 425 0 1 concours soirée 0.270816 0.729184 -0.5204 0.317143 94.242578 330.757422
4309098 0 2311 0 1 et 〰 0.270816 0.729184 -0.5204 0.317143 512.457876 1798.542124
4309099 0 151 0 1 les trips 0.270816 0.729184 -0.5204 0.317143 33.483834 117.516166
4309100 0 181 0 1 nouvelle collection 0.270816 0.729184 -0.5204 0.317143 40.136251 140.863749
4309101 0 151 0 1 trips boostes 0.270816 0.729184 -0.5204 0.317143 33.483834 117.516166

4309102 rows × 11 columns

In [58]:
khi2_2["res_inter_media"] = (khi2_2.interactions_media - khi2_2.exp_inter_media) / np.sqrt(khi2_2.exp_inter_media)
khi2_2["res_inter_nonmedia"] = (khi2_2.interactions_nonmedia - khi2_2.exp_inter_nonmedia) / np.sqrt(khi2_2.exp_inter_nonmedia)
khi2_2
Out[58]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
0 0 838 0 1 #6 canada 0.270816 0.729184 -0.5204 0.317143 185.824189 652.175811 -13.631735 7.276456
1 325 0 1 0 #achatlocal bâton 0.270816 0.729184 1.4012 -0.853923 72.067854 252.932146 29.794303 -15.903841
2 0 1217 0 1 #achatlocal timeline 0.270816 0.729184 -0.5204 0.317143 269.866394 947.133606 -16.427611 8.768861
3 0 120 0 1 #arteta propos 0.270816 0.729184 -0.5204 0.317143 26.609669 93.390331 -5.158456 2.753522
4 0 145 0 1 #assnat covid19qc 0.270816 0.729184 -0.5204 0.317143 32.153350 112.846650 -5.670392 3.026787
... ... ... ... ... ... ... ... ... ... ... ... ... ...
4309097 0 425 0 1 concours soirée 0.270816 0.729184 -0.5204 0.317143 94.242578 330.757422 -9.707862 5.181940
4309098 0 2311 0 1 et 〰 0.270816 0.729184 -0.5204 0.317143 512.457876 1798.542124 -22.637532 12.083642
4309099 0 151 0 1 les trips 0.270816 0.729184 -0.5204 0.317143 33.483834 117.516166 -5.786522 3.088776
4309100 0 181 0 1 nouvelle collection 0.270816 0.729184 -0.5204 0.317143 40.136251 140.863749 -6.335318 3.381716
4309101 0 151 0 1 trips boostes 0.270816 0.729184 -0.5204 0.317143 33.483834 117.516166 -5.786522 3.088776

4309102 rows × 13 columns

In [59]:
graph_interactions_media2 = khi2_2.sort_values(by=["res_inter_media"],ascending=False).head(50)
graph_interactions_media2
Out[59]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
1225167 3775405 621290 5390 1504 donald trump 1867.005099 5026.994901 81.534096 -49.688718 9.749550e+05 3.421740e+06 2836.191435 -1513.924860
709807 2542611 449401 5101 1075 cas covid-19 1672.559253 4503.440747 83.831240 -51.088649 6.634704e+05 2.328542e+06 2307.004782 -1231.451392
1142860 2877955 874242 1825 1584 dernier heur 923.211544 2485.788456 29.679328 -18.087252 8.320393e+05 2.920158e+06 2242.930403 -1197.249260
2656307 2456127 593015 5262 1486 nouveau cas 1827.465971 4920.534029 80.342099 -48.962288 6.761388e+05 2.373003e+06 2164.707279 -1155.494698
1671498 4093936 3174715 5085 3988 françois legault 2457.113035 6615.886965 53.014432 -32.308191 1.611803e+06 5.656848e+06 1955.099982 -1043.608845
2785492 2519190 1208685 3510 1281 pandémie covid-19 1297.479174 3493.520826 61.423869 -37.433092 8.266460e+05 2.901229e+06 1861.573480 -993.685523
2097752 1570168 353024 2020 470 joe biden 674.331694 1815.668306 51.820457 -31.580556 4.264625e+05 1.496729e+06 1751.353303 -934.851319
3523307 3708401 3279067 5657 4090 santé public 2639.642979 7107.357021 58.729203 -35.790901 1.549452e+06 5.438016e+06 1734.416524 -925.810670
1046521 1365984 275905 1613 504 covid-19 québec 573.317348 1543.682652 43.421343 -26.461946 3.640844e+05 1.277805e+06 1660.440093 -886.322942
1739046 1818746 762801 1002 856 george floyd 503.176019 1354.823981 22.237571 -13.552078 5.724509e+05 2.009096e+06 1647.220529 -879.266497
2993584 2213774 1257346 2235 1763 port masque 1082.722133 2915.277867 35.018595 -21.341122 7.697113e+05 2.701409e+06 1645.970801 -878.599408
347370 1013134 21184 40 19 appel calme 15.978141 43.021859 6.009571 -3.662368 2.293572e+05 8.049608e+05 1636.576343 -873.584759
1617797 1006708 17313 8 3 floyd lancer 2.978975 8.021025 2.909102 -1.772872 2.270738e+05 7.969472e+05 1636.090484 -873.325413
2183853 1244396 303894 417 267 lancer appel 185.238104 498.761896 17.028519 -10.377564 3.433290e+05 1.204961e+06 1537.806877 -820.862806
3786353 1558811 657166 2444 895 sûreté québec 904.254428 2434.745572 51.203971 -31.204855 4.913868e+05 1.724590e+06 1522.738442 -812.819457
3143405 957795 80448 1331 206 président américain 416.244102 1120.755898 44.836445 -27.324341 2.302275e+05 8.080155e+05 1516.333252 -809.400444
3003044 960627 168888 1483 366 positif covid-19 500.738675 1348.261325 43.895649 -26.750999 2.504668e+05 8.790482e+05 1418.996068 -757.443026
2472415 1080760 409388 1564 555 ministre françois 573.858979 1545.141021 41.332765 -25.189120 3.304362e+05 1.159712e+06 1305.283660 -696.744711
2913947 684837 47003 393 55 photo jour 121.325542 326.674458 24.664521 -15.031116 1.622835e+05 5.695565e+05 1297.160079 -692.408441
421227 819729 157012 975 317 atteindre covid-19 349.894196 942.105804 33.418362 -20.365904 2.165896e+05 7.601514e+05 1295.981674 -691.779423
1762519 1484738 979190 2234 1211 gouvernement legault 932.960917 2512.039083 42.594999 -25.958354 5.463692e+05 1.917559e+06 1269.493324 -677.640261
2388254 1403925 869602 1242 1106 masque obligatoire 635.875830 1712.124170 24.036733 -14.648528 5.041483e+05 1.769379e+06 1267.230307 -676.432289
75666 1031816 426635 1462 692 24 heure 583.337537 1570.662463 36.379951 -22.170763 3.234075e+05 1.135044e+06 1245.686439 -664.932432
1878599 1524001 1088147 1382 1126 horacio arruda 679.206381 1828.793619 26.966646 -16.434082 5.792366e+05 2.032911e+06 1241.353605 -662.619617
709728 868451 256976 1768 868 cas confirmer 713.870821 1922.129179 39.453362 -24.043769 2.495603e+05 8.758667e+05 1238.871193 -661.294535
2322557 719285 119374 1015 200 maison blanche 329.041369 885.958631 37.815746 -23.045769 1.859703e+05 6.526887e+05 1236.692476 -660.131562
2656951 653379 66853 1072 129 nouveau décès 325.249945 875.750055 41.406321 -25.233946 1.597095e+05 5.605225e+05 1235.295933 -659.386105
4188744 1002643 470480 1408 740 zone rouge 581.712642 1566.287358 34.259172 -20.878312 3.266610e+05 1.146462e+06 1182.732517 -631.328386
3834324 509656 0 6 0 terrence floyd 1.624896 4.375104 3.432224 -2.091675 1.130148e+05 3.966412e+05 1179.859043 -629.794560
1160221 502548 0 918 0 devoir opinion 248.609034 669.390966 42.454272 -25.872591 1.114386e+05 3.911094e+05 1171.602607 -625.387374
1686793 500243 0 3 0 frère tuer 0.812448 2.187552 2.426949 -1.479038 1.109275e+05 3.893155e+05 1168.912667 -623.951516
3278145 499326 0 2 0 recueillir endroit 0.541632 1.458368 1.981596 -1.207629 1.107242e+05 3.886018e+05 1167.840803 -623.379368
666520 499326 0 2 0 calme vote 0.541632 1.458368 1.981596 -1.207629 1.107242e+05 3.886018e+05 1167.840803 -623.379368
3953452 499326 0 2 0 tuer terrence 0.541632 1.458368 1.981596 -1.207629 1.107242e+05 3.886018e+05 1167.840803 -623.379368
4115755 499326 388 2 2 vote frère 1.083264 2.916736 0.880800 -0.536779 1.108102e+05 3.889038e+05 1167.128870 -622.999347
1394702 499898 1138 4 5 endroit frère 2.437343 6.562657 1.000933 -0.609991 1.111034e+05 3.899326e+05 1166.424678 -622.623458
290369 538026 35140 803 83 américain donald 239.942924 646.057076 36.349501 -22.152206 1.270980e+05 4.460680e+05 1152.647286 -615.269252
2232911 918040 424298 893 486 lieu public 373.455183 1005.544817 26.884612 -16.384089 2.976597e+05 1.044678e+06 1137.098032 -606.969247
1686138 516124 41316 18 6 frère george 6.499583 17.500417 4.510980 -2.749093 1.236108e+05 4.338292e+05 1116.416143 -595.929503
2140789 2472664 3112733 2936 3504 justin trudeau 1744.054661 4695.945339 28.541467 -17.393814 1.238546e+06 4.346851e+06 1108.920509 -591.928424
2230443 597670 128687 816 322 lier covid-19 308.188541 829.811459 28.926391 -17.628395 1.610677e+05 5.652893e+05 1087.882225 -580.698441
4242627 863851 428912 306 454 émission spécial 205.820115 554.179885 6.982913 -4.255545 2.866666e+05 1.006096e+06 1078.017948 -575.433009
4002199 709326 258999 954 341 vaccin covid-19 350.706644 944.293356 32.214882 -19.632476 2.147234e+05 7.536016e+05 1067.374224 -569.751517
732658 868489 495829 1598 959 centre ville 692.476362 1864.523638 34.411002 -20.970841 3.025338e+05 1.061784e+06 1028.952133 -549.242267
2619890 986406 665187 1347 804 new york 582.525090 1568.474910 31.674219 -19.302984 3.662362e+05 1.285357e+06 1024.778461 -547.014411
244529 628854 212866 266 213 alerte amber 129.720836 349.279164 11.965328 -7.291941 1.866491e+05 6.550709e+05 1023.553682 -546.360639
3742095 614575 198634 384 282 super bowl 180.363417 485.636583 15.162876 -9.240599 1.803269e+05 6.328821e+05 1022.604671 -545.854068
1264961 696425 297706 490 330 duvernay tardif 222.069072 597.930928 17.979550 -10.957143 2.204458e+05 7.736852e+05 1013.764423 -541.135250
810723 381692 5278 486 5 chronique richard 132.970627 358.029373 30.614910 -18.657417 8.580953e+04 3.011605e+05 1010.070001 -539.163212
2785466 657692 257806 1116 441 pandémie coronavirus 421.660420 1135.339580 33.813529 -20.606728 2.030092e+05 7.124888e+05 1009.138450 -538.665961
In [60]:
fig = px.bar(graph_interactions_media2, x="res_inter_media", y="bigramme", labels={"res_inter_media":"Résiduel de Pearson (<b>en fonction des interactions</b>)","bigramme":"Bigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [61]:
graph_interactions_nonmedia2 = khi2_2.sort_values(by=["res_inter_nonmedia"],ascending=False).head(50)
graph_interactions_nonmedia2
Out[61]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
2913509 364909 11936253 763 14127 photo from 4032.449365 10857.550635 -51.486119 31.376802 2.727749e+06 9.573413e+06 -1430.645672 763.661445
3858944 177218 10404308 280 10142 timeline photo 2822.443739 7599.556261 -47.856254 29.164681 2.346424e+06 8.235102e+06 -1416.111903 755.903494
2635890 0 4710817 0 2077 nom jésus 562.484710 1514.515290 -23.716760 14.453529 1.044611e+06 3.666206e+06 -1022.061973 545.564383
1343932 0 4587911 0 13853 détail intérieur 3751.613233 10101.386767 -61.250414 37.327384 1.017357e+06 3.570554e+06 -1008.640972 538.400414
2142652 5214 4617898 6 3868 jésus christ 1049.140956 2824.859044 -32.205206 19.626579 1.025162e+06 3.597950e+06 -1007.353391 537.713119
4153162 160 4163683 1 496 we re 134.595523 362.404477 -11.515336 7.017705 9.233207e+05 3.240522e+06 -960.729270 512.825724
876347 0 2254948 0 688 com timeline 186.321368 501.678632 -13.649959 8.318593 5.000285e+05 1.754919e+06 -707.126933 377.455848
1636528 3929 2162300 7 908 for the 247.796586 667.203414 -15.296873 9.322260 4.803553e+05 1.685874e+06 -687.407771 366.929996
299889 0 2098790 0 189 an update 51.184213 137.815787 -7.154314 4.360001 4.654009e+05 1.633389e+06 -682.202971 364.151737
1967278 460 2019839 2 52 incroyable talent 14.624061 39.375939 -3.301149 2.011794 4.479957e+05 1.572303e+06 -668.637573 356.910691
2852077 11901 2079671 23 688 paul mukendi 192.550134 518.449866 -12.218734 7.446372 4.638003e+05 1.627772e+06 -663.553841 354.197055
1100759 444 1974086 3 2597 côte ivoire 704.121447 1895.878553 -26.422230 16.102303 4.378466e+05 1.536683e+06 -661.028768 352.849202
3954325 0 1945901 0 212 tune in 57.412980 154.587020 -7.577135 4.617677 4.314982e+05 1.514403e+06 -656.885211 350.637421
579178 135317 2903777 245 2148 bon chance 648.062547 1744.937453 -15.833028 9.649005 6.739107e+05 2.365183e+06 -656.084986 350.210271
3037192 164550 2941413 217 2586 prendre soin 759.097083 2043.902917 -19.675619 11.990766 6.887387e+05 2.417224e+06 -631.626622 337.154690
3562400 396 1720846 2 490 sel poivre 133.241443 358.758557 -11.369761 6.928989 3.816807e+05 1.339561e+06 -617.162116 329.433711
1906071 0 1681779 0 131 i giving 35.476888 95.523112 -5.956248 3.629872 3.729299e+05 1.308849e+06 -610.679830 325.973545
3561159 0 1560583 0 1407 seigneur jésus 381.038029 1025.961971 -19.520195 11.896048 3.460550e+05 1.214528e+06 -588.264376 314.008445
2703453 9539 1625068 32 2363 of the 648.604179 1746.395821 -24.211215 14.754861 3.624696e+05 1.272137e+06 -586.210397 312.912056
748102 178157 2735884 322 2057 chance gagner 644.271124 1734.728876 -12.696589 7.737588 6.461806e+05 2.267860e+06 -582.224891 310.784641
4151635 0 1520146 0 130 watch live 35.206072 94.793928 -5.933471 3.615991 3.370882e+05 1.183058e+06 -580.592958 309.913535
1747375 0 1518556 0 126 giving an 34.122809 91.877191 -5.841473 3.559926 3.367356e+05 1.181820e+06 -580.289242 309.751415
807916 0 1480876 0 2138 chris yapi 579.004482 1558.995518 -24.062512 14.664238 3.283802e+05 1.152496e+06 -573.044648 305.884338
580340 15885 1581591 30 1602 bon journée 441.971616 1190.028384 -19.596121 11.942318 3.542359e+05 1.243240e+06 -568.487668 303.451876
4171082 0 1453954 0 1487 wwwuniversty energy 402.703305 1084.296695 -20.067469 12.229568 3.224103e+05 1.131544e+06 -567.811843 303.091130
1526184 24849 1621419 37 2388 facile faire 656.728657 1768.271343 -24.182912 14.737612 3.650554e+05 1.281213e+06 -563.070900 300.560471
1396458 0 1397459 0 1435 energy energy 388.620876 1046.379124 -19.713469 12.013833 3.098827e+05 1.087576e+06 -556.671069 297.144318
3988016 0 1384184 0 130 update the 35.206072 94.793928 -5.933471 3.615991 3.069390e+05 1.077245e+06 -554.020744 295.729606
307365 4883 1397037 10 643 and the 176.842810 476.157190 -12.546246 7.645965 3.108719e+05 1.091048e+06 -548.801050 292.943397
3288684 20581 1509731 14 144 regarder direct 42.788919 115.211081 -4.401084 2.682120 3.393425e+05 1.190970e+06 -547.200782 292.089193
1954477 0 1326669 0 112 in now 30.331385 81.668615 -5.507394 3.356330 2.941852e+05 1.032484e+06 -542.388414 289.520409
1636177 420 1314231 2 361 for more 98.306187 264.693813 -9.713232 5.919463 2.915202e+05 1.023131e+06 -539.148253 287.790850
240888 0 1220536 0 1892 alassane ouattara 512.383761 1379.616239 -22.635895 13.794825 2.706505e+05 9.498855e+05 -520.240802 277.698280
3037352 158789 2283372 131 2001 prendre temps 577.379587 1554.620413 -18.576916 11.321192 5.415425e+05 1.900619e+06 -520.119185 277.633362
2664039 0 1205484 0 105 now for 28.435674 76.564326 -5.332511 3.249753 2.673128e+05 9.381712e+05 -517.022971 275.980640
3475684 1471 1204625 3 438 révérend paul 119.429830 321.570170 -10.653881 6.492716 2.674485e+05 9.386475e+05 -514.309782 274.532373
3258729 0 1188669 0 116 re doing 31.414649 84.585351 -5.604877 3.415739 2.635841e+05 9.250849e+05 -513.404395 274.049088
3040833 22857 1360842 25 186 presse covid-19 57.142164 153.857836 -4.252034 2.591286 3.068314e+05 1.076868e+06 -512.659867 273.651668
333299 0 1176752 0 466 anti média 126.200229 339.799771 -11.233888 6.846185 2.609415e+05 9.158105e+05 -510.824344 272.671888
3868388 5467 1215502 7 1053 to the 287.064898 772.935102 -16.529839 10.073657 2.707465e+05 9.502225e+05 -509.826343 272.139167
471878 51223 1535725 54 1046 avoir besoin 297.897535 802.102465 -14.131034 8.611771 3.519013e+05 1.235047e+06 -506.864279 270.558053
3650433 0 1150033 0 371 solution com 100.472714 270.527286 -10.023608 6.108613 2.550166e+05 8.950164e+05 -504.991729 269.558508
1886917 870 1131464 5 548 huile olive 149.761215 403.238785 -11.829123 7.208934 2.510919e+05 8.812421e+05 -499.354539 266.549444
3845293 0 1119089 0 118 the latest 31.956281 86.043719 -5.652989 3.445059 2.481549e+05 8.709341e+05 -498.151482 265.907267
2764195 0 1096626 0 137 our economy 37.101784 99.898216 -6.091123 3.712068 2.431738e+05 8.534522e+05 -493.126547 263.225017
2142482 0 1047994 0 469 jésus amen 127.012676 341.987324 -11.269990 6.868186 2.323898e+05 8.156042e+05 -482.068231 257.322221
169824 0 1042134 0 251 across the 67.974801 183.025199 -8.244683 5.024496 2.310903e+05 8.110437e+05 -480.718567 256.601786
3844166 0 1040443 0 232 the country 62.829298 169.170702 -7.926493 4.830584 2.307154e+05 8.097276e+05 -480.328394 256.393517
707391 49174 1395237 103 1711 carte cadeau 491.260117 1322.739883 -17.517294 10.675434 3.202942e+05 1.124117e+06 -479.057119 255.714925
3040883 48642 1358376 65 428 presse direct 133.512259 359.487741 -5.929359 3.613485 3.120024e+05 1.095016e+06 -471.488895 251.675099
In [62]:
fig = px.bar(graph_interactions_nonmedia2, x="res_inter_nonmedia", y="bigramme", labels={"res_inter_nonmedia":"Résiduel de Pearson (<b>en fonction des interactions</b>)","bigramme":"Bigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="cyan", opacity=0.75, textfont_size=12)
fig

trigrammes

In [63]:
media3 = pan.read_csv("canada-trigrammes-media-nettoye.csv", low_memory=False, names=["trigramme","interactions"])
nonmedia3 = pan.read_csv("canada-trigrammes-nonmedia-nettoye.csv", low_memory=False, names=["trigramme","interactions"])
In [64]:
media3
Out[64]:
trigramme interactions
0 recueillir endroit frère 249663
1 endroit frère tuer 249663
2 frère tuer terrence 249663
3 tuer terrence floyd 249663
4 terrence floyd lancer 249663
... ... ...
3617284 polpa mutti snapie 363
3617285 mutti snapie risotto 363
3617286 snapie risotto tomate 363
3617287 maskinongé demeure zone 145
3617288 demeure zone orange 145

3617289 rows × 2 columns

In [65]:
nonmedia3
Out[65]:
trigramme interactions
0 guinéen incroyable talent 197962
1 incroyable talent guinéen 197962
2 talent guinéen incroyable 197962
3 guinéen incroyable talent 197962
4 ami animal rappeler 189308
... ... ...
9799338 cię ale przede 149
9799339 ale przede wszystkim 149
9799340 przede wszystkim by 149
9799341 wszystkim by cię 149
9799342 by cię kochać 149

9799343 rows × 2 columns

In [66]:
media3_table = pan.pivot_table(media3,index=["trigramme"],values=["trigramme","interactions"],aggfunc=[len,np.sum])
nonmedia3_table = pan.pivot_table(nonmedia3,index=["trigramme"],values=["trigramme","interactions"],aggfunc=[len,np.sum])
In [67]:
media3_table
Out[67]:
len sum
interactions interactions
trigramme
#achatlocal bâton roue 1 325
#cavabienaller joignez vous 1 334
#revuedepress presse canadien 1 184
' ' nouveau 1 206
' avantage gaspésien 1 147
... ... ...
瑞金日 fedex g 1 236
蜜ய this table 1 254
黑瑞金包装 gpackaging ரය 1 390
프블 radio gaspesie 1 666
화대 cinéma film 1 279

2041002 rows × 2 columns

In [68]:
nonmedia3_table
Out[68]:
len sum
interactions interactions
trigramme
#6 canada prouver 1 838
#achatlocal timeline photo 1 1217
#arteta propos sortie 1 120
#assnat covid19qc aîné 1 145
#aujourdhui souvenir victime 2 430
... ... ...
concours soirée anniversaire 1 425
et 〰 500 1 2311
les trips boostes 1 151
nouvelle collection découvrir 1 181
trips boostes dernière 1 151

5989885 rows × 2 columns

In [69]:
media3_table["type"] = "media"
nonmedia3_table["type"] = "non-media"
In [70]:
media3_table
Out[70]:
len sum type
interactions interactions
trigramme
#achatlocal bâton roue 1 325 media
#cavabienaller joignez vous 1 334 media
#revuedepress presse canadien 1 184 media
' ' nouveau 1 206 media
' avantage gaspésien 1 147 media
... ... ... ...
瑞金日 fedex g 1 236 media
蜜ய this table 1 254 media
黑瑞金包装 gpackaging ரය 1 390 media
프블 radio gaspesie 1 666 media
화대 cinéma film 1 279 media

2041002 rows × 3 columns

In [71]:
nonmedia3_table
Out[71]:
len sum type
interactions interactions
trigramme
#6 canada prouver 1 838 non-media
#achatlocal timeline photo 1 1217 non-media
#arteta propos sortie 1 120 non-media
#assnat covid19qc aîné 1 145 non-media
#aujourdhui souvenir victime 2 430 non-media
... ... ... ...
concours soirée anniversaire 1 425 non-media
et 〰 500 1 2311 non-media
les trips boostes 1 151 non-media
nouvelle collection découvrir 1 181 non-media
trips boostes dernière 1 151 non-media

5989885 rows × 3 columns

In [72]:
tableau3 = pan.concat([media3_table,nonmedia3_table], names = ["trigramme","nb","interactions","type"], levels=0)
In [73]:
tableau3
Out[73]:
len sum type
interactions interactions
trigramme
#achatlocal bâton roue 1 325 media
#cavabienaller joignez vous 1 334 media
#revuedepress presse canadien 1 184 media
' ' nouveau 1 206 media
' avantage gaspésien 1 147 media
... ... ... ...
concours soirée anniversaire 1 425 non-media
et 〰 500 1 2311 non-media
les trips boostes 1 151 non-media
nouvelle collection découvrir 1 181 non-media
trips boostes dernière 1 151 non-media

8030887 rows × 3 columns

In [74]:
tableau3["trigramme"] = tableau3.index
tableau3.columns = ["nb","interactions","media","trigramme"]
tableau3.reset_index(drop=True, inplace=True)
tableau3
Out[74]:
nb interactions media trigramme
0 1 325 media #achatlocal bâton roue
1 1 334 media #cavabienaller joignez vous
2 1 184 media #revuedepress presse canadien
3 1 206 media ' ' nouveau
4 1 147 media ' avantage gaspésien
... ... ... ... ...
8030882 1 425 non-media concours soirée anniversaire
8030883 1 2311 non-media et 〰 500
8030884 1 151 non-media les trips boostes
8030885 1 181 non-media nouvelle collection découvrir
8030886 1 151 non-media trips boostes dernière

8030887 rows × 4 columns

In [75]:
khi2_3 = pan.pivot_table(tableau3,index=["trigramme"],columns=["media"],values=["trigramme","interactions"],aggfunc=[len,np.sum],fill_value=0)
In [76]:
khi2_3
Out[76]:
len sum
interactions nb interactions nb
media media non-media media non-media media non-media media non-media
trigramme
#6 canada prouver 0 1 0 1 0 838 0 1
#achatlocal bâton roue 1 0 1 0 325 0 1 0
#achatlocal timeline photo 0 1 0 1 0 1217 0 1
#arteta propos sortie 0 1 0 1 0 120 0 1
#assnat covid19qc aîné 0 1 0 1 0 145 0 1
... ... ... ... ... ... ... ... ...
concours soirée anniversaire 0 1 0 1 0 425 0 1
et 〰 500 0 1 0 1 0 2311 0 1
les trips boostes 0 1 0 1 0 151 0 1
nouvelle collection découvrir 0 1 0 1 0 181 0 1
trips boostes dernière 0 1 0 1 0 151 0 1

7811363 rows × 8 columns

In [77]:
khi2_3["trigramme"] = khi2_3.index
khi2_3.columns = khi2_3.columns.get_level_values(0)
khi2_3.reset_index(drop=True, inplace=True)
khi2_3.columns = ["a","b","c","d","interactions_media","interactions_nonmedia","obs_media","obs_nonmedia","trigramme"]
khi2_3 = khi2_3.drop(columns=["a","b","c","d"])
khi2_3
Out[77]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme
0 0 838 0 1 #6 canada prouver
1 325 0 1 0 #achatlocal bâton roue
2 0 1217 0 1 #achatlocal timeline photo
3 0 120 0 1 #arteta propos sortie
4 0 145 0 1 #assnat covid19qc aîné
... ... ... ... ... ...
7811358 0 425 0 1 concours soirée anniversaire
7811359 0 2311 0 1 et 〰 500
7811360 0 151 0 1 les trips boostes
7811361 0 181 0 1 nouvelle collection découvrir
7811362 0 151 0 1 trips boostes dernière

7811363 rows × 5 columns

In [78]:
khi2_3.obs_media.sum(), khi2_3.obs_nonmedia.sum()
Out[78]:
(3617289, 9799343)
In [79]:
print("Nb de lignes fichier media3 = ", media3.shape[0])
print("Somme observée média khi2_3 = ", khi2_3.obs_media.sum())

print("Nb de lignes  fichier nonmedia3 = ", nonmedia3.shape[0])
print("Somme observée nonmédia khi2_3 = ", khi2_3.obs_nonmedia.sum())
Nb de lignes fichier media3 =  3617289
Somme observée média khi2_3 =  3617289
Nb de lignes  fichier nonmedia3 =  9799343
Somme observée nonmédia khi2_3 =  9799343
In [80]:
khi2_3["exp_media"] = ((khi2_3.obs_media + khi2_3.obs_nonmedia) * khi2_3.obs_media.sum()) / (khi2_3.obs_media.sum() + khi2_3.obs_nonmedia.sum())
khi2_3["exp_nonmedia"] = ((khi2_3.obs_media + khi2_3.obs_nonmedia) * khi2_3.obs_nonmedia.sum()) / (khi2_3.obs_media.sum() + khi2_3.obs_nonmedia.sum())
khi2_3
Out[80]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia
0 0 838 0 1 #6 canada prouver 0.269612 0.730388
1 325 0 1 0 #achatlocal bâton roue 0.269612 0.730388
2 0 1217 0 1 #achatlocal timeline photo 0.269612 0.730388
3 0 120 0 1 #arteta propos sortie 0.269612 0.730388
4 0 145 0 1 #assnat covid19qc aîné 0.269612 0.730388
... ... ... ... ... ... ... ...
7811358 0 425 0 1 concours soirée anniversaire 0.269612 0.730388
7811359 0 2311 0 1 et 〰 500 0.269612 0.730388
7811360 0 151 0 1 les trips boostes 0.269612 0.730388
7811361 0 181 0 1 nouvelle collection découvrir 0.269612 0.730388
7811362 0 151 0 1 trips boostes dernière 0.269612 0.730388

7811363 rows × 7 columns

In [81]:
khi2_3["res_media"] = (khi2_3.obs_media - khi2_3.exp_media) / np.sqrt(khi2_3.exp_media)
khi2_3["res_nonmedia"] = (khi2_3.obs_nonmedia - khi2_3.exp_nonmedia) / np.sqrt(khi2_3.exp_nonmedia)
khi2_3
Out[81]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia
0 0 838 0 1 #6 canada prouver 0.269612 0.730388 -0.519242 0.315474
1 325 0 1 0 #achatlocal bâton roue 0.269612 0.730388 1.406642 -0.854627
2 0 1217 0 1 #achatlocal timeline photo 0.269612 0.730388 -0.519242 0.315474
3 0 120 0 1 #arteta propos sortie 0.269612 0.730388 -0.519242 0.315474
4 0 145 0 1 #assnat covid19qc aîné 0.269612 0.730388 -0.519242 0.315474
... ... ... ... ... ... ... ... ... ...
7811358 0 425 0 1 concours soirée anniversaire 0.269612 0.730388 -0.519242 0.315474
7811359 0 2311 0 1 et 〰 500 0.269612 0.730388 -0.519242 0.315474
7811360 0 151 0 1 les trips boostes 0.269612 0.730388 -0.519242 0.315474
7811361 0 181 0 1 nouvelle collection découvrir 0.269612 0.730388 -0.519242 0.315474
7811362 0 151 0 1 trips boostes dernière 0.269612 0.730388 -0.519242 0.315474

7811363 rows × 9 columns

In [82]:
graph_media3 = khi2_3.sort_values(by=["res_media"],ascending=False).head(50)
graph_media3
Out[82]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia
4747575 762701 142711 1711 354 nouveau cas covid-19 556.749398 1508.250602 48.918173 -29.720996
4768506 260160 6314 936 23 nouvelle union avenir 258.558195 700.441805 42.130134 -25.596817
7184563 260160 6314 936 23 union avenir érable 258.558195 700.441805 42.130134 -25.596817
4422778 1074338 404734 1543 544 ministre françois legault 562.680868 1524.319132 41.327240 -25.109006
6306395 616246 292096 1458 579 saguenay lac saint 549.200253 1487.799747 38.779556 -23.561121
3878545 612612 313349 1464 607 lac saint jean 558.367071 1512.632929 38.325895 -23.285493
520249 532595 33884 789 79 américain donald trump 234.023476 633.976524 36.278131 -22.041342
5683214 505967 31699 734 77 président américain donald 218.655575 592.344425 34.851165 -21.174367
1468932 133604 354 560 1 ciho fm 963 151.252500 409.747500 33.235623 -20.192819
4748513 133604 354 560 1 nouveau ciho fm 151.252500 409.747500 33.235623 -20.192819
1265346 336551 46578 657 116 cas confirmer covid-19 208.410307 564.589693 31.073466 -18.879167
1450323 364716 5278 446 5 chronique richard martineau 121.595147 329.404853 29.419089 -17.874024
6229998 139480 1618 444 4 région sorel tracy 120.786310 327.213690 29.409043 -17.867921
3300789 133008 1309 431 3 hebdomadaire région sorel 117.011738 316.988262 29.026769 -17.635664
7159072 138693 0 398 0 téléjournal 18 édition 107.305695 290.694305 28.062421 -17.049760
5684355 289570 16027 435 42 président donald trump 128.605067 348.394933 27.017959 -16.415181
2034682 269925 15519 421 34 dernier 24 heure 122.673596 332.326404 26.934948 -16.364746
4664088 665165 300397 804 397 national santé public 323.804371 877.195629 26.685592 -16.213246
144312 162241 90934 448 69 24 dernier heure 139.389559 377.610441 26.139393 -15.881394
2143409 596692 194146 611 217 directeur national santé 223.238984 604.761016 25.952496 -15.767842
2306169 296252 28397 419 55 déclarer positif covid-19 127.796230 346.203770 25.759532 -15.650604
4423520 522251 276344 678 307 ministre justin trudeau 265.568115 719.431885 25.308375 -15.376496
5814523 640012 262507 597 228 québec françois legault 222.430147 602.569853 25.115163 -15.259107
4747558 179053 14861 392 54 nouveau cas confirmer 120.247085 325.752915 24.782033 -15.056709
4424969 463512 207419 501 146 ministre québec françois 174.439158 472.560842 24.725331 -15.022259
4747602 200471 10134 370 44 nouveau cas décès 111.619492 302.380508 24.456242 -14.858770
2364701 229711 7919 324 19 démocrate joe biden 92.477019 250.522981 24.075604 -14.627507
3300611 79647 354 280 2 hebdo st maurice 76.030668 205.969332 23.392168 -14.212275
5502805 505800 339271 547 231 prestation canadien urgence 209.758369 568.241631 23.285284 -14.147336
4983282 353843 15397 307 21 pandémie covid-19 québec 88.432834 239.567166 23.242249 -14.121189
2241329 713152 354377 698 436 dr horacio arruda 305.740347 828.259653 22.433512 -13.629828
7244178 80219 943 264 6 vaudreuil soulanges néomedia 72.795321 197.204679 22.410254 -13.615698
2145629 207643 136485 478 185 direction santé public 178.752954 484.247046 22.382225 -13.598668
6838704 163583 27636 310 37 sûreté québec sq 93.555468 253.444532 22.377524 -13.595812
1266468 118779 10920 281 26 cas covid-19 confirmer 82.770976 224.229024 21.788544 -13.237968
5346178 297029 16771 246 8 point pandémie covid-19 68.481524 185.518476 21.451460 -13.033168
317171 146892 0 227 0 actualité jour devoir 61.201992 165.798008 21.193200 -12.876258
3763865 146892 0 227 0 jour devoir opinion 61.201992 165.798008 21.193200 -12.876258
7159079 149236 0 222 0 téléjournal 22 édition 59.853930 162.146070 20.958495 -12.733659
6370412 331046 109797 401 146 santé public canada 147.477928 399.522072 20.876239 -12.683683
6109454 147614 0 219 0 revoir téléjournal 22 59.045094 159.954906 20.816402 -12.647328
1310235 118304 40040 368 119 centre service scolaire 131.301190 355.698810 20.656738 -12.550322
3616516 85236 13106 271 39 intégrer santé service 83.579813 226.420187 20.500557 -12.455432
3980204 538049 170950 418 175 lieu public fermer 159.880093 433.119907 20.413821 -12.402734
1832641 150641 8649 228 11 cours dernier 24 64.437339 174.562661 20.375833 -12.379654
4714624 182190 56690 305 69 nombre cas covid-19 100.835000 273.165000 20.331791 -12.352895
4716677 165643 109500 368 128 nombre nouveau cas 133.727700 362.272300 20.258638 -12.308450
2326323 161713 29619 279 52 décès lier covid-19 89.241671 241.758329 20.087089 -12.204223
1486374 95403 11206 247 28 ciuss estrie chus 74.143382 200.856618 20.074735 -12.196717
6372875 332248 422052 832 749 santé service social 426.257045 1154.742955 19.652382 -11.940110
In [83]:
fig = px.bar(graph_media3, x="res_media", y="trigramme", labels={"res_media":"Résiduel de Pearson","trigramme":"Trigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="DarkOliveGreen", opacity=0.75, textfont_size=12)
fig
In [84]:
graph_nonmedia3 = khi2_3.sort_values(by=["res_nonmedia"],ascending=False).head(50)
graph_nonmedia3
Out[84]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia
7568817 0 1397459 0 1435 wwwuniversty energy energy 386.893649 1048.106351 -19.669612 11.950578
4711113 0 1798241 0 989 nom jésus christ 266.646564 722.353436 -16.329316 9.921130
5216398 243 792944 1 902 petit petit gamin 243.459906 659.540094 -15.539112 9.441029
6443467 0 746776 0 784 seigneur jésus christ 211.376043 572.623957 -14.538777 8.833260
5211473 0 748113 0 777 petit gamin oboxadsq 209.488756 567.511244 -14.473726 8.793737
4722339 0 887305 0 700 noovo télé web 188.728609 511.271391 -13.737853 8.346646
1557869 0 2254948 0 688 com timeline photo 185.493262 502.506738 -13.619591 8.274794
5725966 0 456224 0 667 publié chris yapi 179.831404 487.168596 -13.410123 8.147529
3072989 15161 466521 25 811 gagner carte cadeau 225.395882 610.604118 -13.347988 8.109777
204288 0 278222 0 657 53 14 43 177.135281 479.864719 -13.309218 8.086222
48770 0 278222 0 657 14 43 34 177.135281 479.864719 -13.309218 8.086222
1210360 0 261497 0 625 canada' import' post 168.507687 456.492313 -12.981051 7.886839
3023451 0 261497 0 625 from canada' import' 168.507687 456.492313 -12.981051 7.886839
7157583 0 796804 0 624 télé web rattrapage 168.238075 455.761925 -12.970662 7.880527
5243101 0 261029 0 623 photo from canada' 167.968462 455.031538 -12.960265 7.874210
2121759 0 917439 0 593 dimanche jeudi 18h30 159.880093 433.119907 -12.644370 7.682283
3712787 0 911412 0 584 jeudi 18h30 noovo 157.453583 426.546417 -12.548051 7.623763
132298 0 240149 0 573 21 26 91 154.487847 418.512153 -12.429314 7.551623
4828985 0 865352 0 573 od dimanche jeudi 154.487847 418.512153 -12.429314 7.551623
153439 0 240262 0 572 26 91 26 154.218235 417.781765 -12.418463 7.545030
2175149 0 594592 0 572 district 31' post 154.218235 417.781765 -12.418463 7.545030
3023612 0 594592 0 572 from district 31' 154.218235 417.781765 -12.418463 7.545030
5243147 0 593254 0 570 photo from district 153.679010 416.320990 -12.396734 7.531828
153412 0 240616 0 567 26 53 14 152.870174 414.129826 -12.364068 7.511981
234508 0 240616 0 567 91 26 53 152.870174 414.129826 -12.364068 7.511981
2228717 0 234344 0 562 douane 21 26 151.522112 410.477888 -12.309432 7.478786
4410959 0 236700 0 561 million douane 21 151.252500 409.747500 -12.298476 7.472130
4106587 0 476356 0 560 made in alsace 150.982888 409.017112 -12.287509 7.465467
5243418 0 354865 0 557 photo from spotted 150.174051 406.825949 -12.254552 7.445443
7192477 0 436043 0 538 université médecine énergétique 145.051417 392.948583 -12.043729 7.317355
6768122 0 285066 0 522 super facile faire 140.737620 381.262380 -11.863289 7.207725
77268 0 820453 0 521 18h30 noovo télé 140.468008 380.531992 -11.851920 7.200818
4936570 449 238703 2 495 oui avoir lire 133.997313 363.002687 -11.402945 6.928037
176214 0 199772 0 482 34 photo from 129.953128 352.046872 -11.399699 6.926064
189712 0 199772 0 482 43 34 photo 129.953128 352.046872 -11.399699 6.926064
6456106 444 303716 2 478 semaine julie lundi 129.413904 350.586096 -11.200215 6.804864
4078225 0 289803 0 460 lundi jeudi 21h 124.021658 335.978342 -11.136501 6.766154
1443817 0 318506 0 457 chri yapi mentir 123.212821 333.787179 -11.100127 6.744055
4015354 0 278161 0 450 livraison gratuit canada 121.325535 328.674465 -11.014787 6.692205
3813769 0 285512 0 449 julie lundi jeudi 121.055922 327.944078 -11.002542 6.684765
3712927 0 279621 0 444 jeudi 21h noovo 119.707861 324.292139 -10.941109 6.647441
3116271 13556 226235 15 531 geneviève o' gleman 147.208315 398.791685 -10.896640 6.620423
6138160 0 266607 0 436 robert savoie présent 117.550962 318.449038 -10.842092 6.587281
6392506 0 266607 0 436 savoie présent engager 117.550962 318.449038 -10.842092 6.587281
1857462 0 113737 0 429 covid-19 détail intérieur 115.663676 313.336324 -10.754705 6.534188
6288566 1471 1204625 3 438 révérend paul mukendi 118.899024 322.100976 -10.628957 6.457788
6410107 0 175214 0 418 savourer geneviève o' 112.697941 305.302059 -10.615929 6.449872
4713144 0 185064 0 418 nom seigneur jésus 112.697941 305.302059 -10.615929 6.449872
5240264 0 104639 0 412 photo ariel tarr 111.080267 300.919733 -10.539462 6.403414
1578337 0 209142 0 410 commentaire inutile envoyer 110.541043 299.458957 -10.513850 6.387853
In [85]:
fig = px.bar(graph_nonmedia3, x="res_nonmedia", y="trigramme", labels={"res_nonmedia":"Résiduel de Pearson","trigramme":"Trigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="Olive", opacity=0.75, textfont_size=12)
fig
In [86]:
khi2_3["exp_inter_media"] = ((khi2_3.interactions_media + khi2_3.interactions_nonmedia) * khi2_3.interactions_media.sum()) / (khi2_3.interactions_media.sum() + khi2_3.interactions_nonmedia.sum())
khi2_3["exp_inter_nonmedia"] = ((khi2_3.interactions_media + khi2_3.interactions_nonmedia) * khi2_3.interactions_nonmedia.sum()) / (khi2_3.interactions_media.sum() + khi2_3.interactions_nonmedia.sum())
khi2_3
Out[86]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia
0 0 838 0 1 #6 canada prouver 0.269612 0.730388 -0.519242 0.315474 184.414231 653.585769
1 325 0 1 0 #achatlocal bâton roue 0.269612 0.730388 1.406642 -0.854627 71.521032 253.478968
2 0 1217 0 1 #achatlocal timeline photo 0.269612 0.730388 -0.519242 0.315474 267.818758 949.181242
3 0 120 0 1 #arteta propos sortie 0.269612 0.730388 -0.519242 0.315474 26.407766 93.592234
4 0 145 0 1 #assnat covid19qc aîné 0.269612 0.730388 -0.519242 0.315474 31.909384 113.090616
... ... ... ... ... ... ... ... ... ... ... ...
7811358 0 425 0 1 concours soirée anniversaire 0.269612 0.730388 -0.519242 0.315474 93.527504 331.472496
7811359 0 2311 0 1 et 〰 500 0.269612 0.730388 -0.519242 0.315474 508.569557 1802.430443
7811360 0 151 0 1 les trips boostes 0.269612 0.730388 -0.519242 0.315474 33.229772 117.770228
7811361 0 181 0 1 nouvelle collection découvrir 0.269612 0.730388 -0.519242 0.315474 39.831713 141.168287
7811362 0 151 0 1 trips boostes dernière 0.269612 0.730388 -0.519242 0.315474 33.229772 117.770228

7811363 rows × 11 columns

In [87]:
khi2_3["res_inter_media"] = (khi2_3.interactions_media - khi2_3.exp_inter_media) / np.sqrt(khi2_3.exp_inter_media)
khi2_3["res_inter_nonmedia"] = (khi2_3.interactions_nonmedia - khi2_3.exp_inter_nonmedia) / np.sqrt(khi2_3.exp_inter_nonmedia)
khi2_3
Out[87]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
0 0 838 0 1 #6 canada prouver 0.269612 0.730388 -0.519242 0.315474 184.414231 653.585769 -13.579920 7.213452
1 325 0 1 0 #achatlocal bâton roue 0.269612 0.730388 1.406642 -0.854627 71.521032 253.478968 29.972643 -15.921023
2 0 1217 0 1 #achatlocal timeline photo 0.269612 0.730388 -0.519242 0.315474 267.818758 949.181242 -16.365169 8.692935
3 0 120 0 1 #arteta propos sortie 0.269612 0.730388 -0.519242 0.315474 26.407766 93.592234 -5.138849 2.729680
4 0 145 0 1 #assnat covid19qc aîné 0.269612 0.730388 -0.519242 0.315474 31.909384 113.090616 -5.648839 3.000579
... ... ... ... ... ... ... ... ... ... ... ... ... ...
7811358 0 425 0 1 concours soirée anniversaire 0.269612 0.730388 -0.519242 0.315474 93.527504 331.472496 -9.670962 5.137071
7811359 0 2311 0 1 et 〰 500 0.269612 0.730388 -0.519242 0.315474 508.569557 1802.430443 -22.551487 11.979015
7811360 0 151 0 1 les trips boostes 0.269612 0.730388 -0.519242 0.315474 33.229772 117.770228 -5.764527 3.062031
7811361 0 181 0 1 nouvelle collection découvrir 0.269612 0.730388 -0.519242 0.315474 39.831713 141.168287 -6.311237 3.352435
7811362 0 151 0 1 trips boostes dernière 0.269612 0.730388 -0.519242 0.315474 33.229772 117.770228 -5.764527 3.062031

7811363 rows × 13 columns

In [88]:
graph_interactions_media3 = khi2_3.sort_values(by=["res_inter_media"],ascending=False).head(50)
graph_interactions_media3
Out[88]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
2913620 1006708 0 8 0 floyd lancer appel 2.156898 5.843102 3.978585 -2.417251 221540.909186 7.851671e+05 1668.150084 -886.096547
3898354 1008259 2299 32 4 lancer appel calme 9.706043 26.293957 7.155930 -4.347696 222388.158339 7.881698e+05 1666.461736 -885.199722
4422778 1074338 404734 1543 544 ministre françois legault 562.680868 1524.319132 41.327240 -25.109006 325491.558259 1.153580e+06 1312.571305 -697.218382
4747575 762701 142711 1711 354 nouveau cas covid-19 556.749398 1508.250602 48.918173 -29.720996 199249.233808 7.061628e+05 1262.287887 -670.508576
1184474 499326 0 2 0 calme vote frère 0.539225 1.460775 1.989292 -1.208625 109884.033921 3.894420e+05 1174.831147 -624.052855
5952994 499326 0 2 0 recueillir endroit frère 0.539225 1.460775 1.989292 -1.208625 109884.033921 3.894420e+05 1174.831147 -624.052855
7142717 499326 0 2 0 tuer terrence floyd 0.539225 1.460775 1.989292 -1.208625 109884.033921 3.894420e+05 1174.831147 -624.052855
6929371 499326 0 2 0 terrence floyd lancer 0.539225 1.460775 1.989292 -1.208625 109884.033921 3.894420e+05 1174.831147 -624.052855
641178 499326 0 2 0 appel calme vote 0.539225 1.460775 1.989292 -1.208625 109884.033921 3.894420e+05 1174.831147 -624.052855
7450723 499326 0 2 0 vote frère george 0.539225 1.460775 1.989292 -1.208625 109884.033921 3.894420e+05 1174.831147 -624.052855
2486117 499326 0 2 0 endroit frère tuer 0.539225 1.460775 1.989292 -1.208625 109884.033921 3.894420e+05 1174.831147 -624.052855
3035332 499326 0 2 0 frère tuer terrence 0.539225 1.460775 1.989292 -1.208625 109884.033921 3.894420e+05 1174.831147 -624.052855
520249 532595 33884 789 79 américain donald trump 234.023476 633.976524 36.278131 -22.041342 124662.039732 4.418170e+05 1155.371586 -613.716225
3121539 507382 17174 6 2 george floyd lancer 2.156898 5.843102 2.616779 -1.589865 115436.266682 4.091197e+05 1153.598824 -612.774561
5683214 505967 31699 734 77 président américain donald 218.655575 592.344425 34.851165 -21.174367 118321.315097 4.193447e+05 1126.946917 -598.617464
3033790 516124 39579 18 5 frère george floyd 6.201083 16.798917 4.738145 -2.878734 122290.622363 4.334124e+05 1126.201088 -598.221290
2143409 596692 194146 611 217 directeur national santé 223.238984 604.761016 25.952496 -15.767842 174035.539142 6.168025e+05 1013.137850 -538.163778
1450323 364716 5278 446 5 chronique richard martineau 121.595147 329.404853 29.419089 -17.874024 81422.624190 2.885714e+05 992.804809 -527.363168
5814523 640012 262507 597 228 québec françois legault 222.430147 602.569853 25.115163 -15.259107 198612.586587 7.039064e+05 990.440448 -526.107255
4421086 447543 70379 204 103 ministre canada justin 82.770976 224.229024 13.325011 -8.095817 113976.357362 4.039456e+05 988.041593 -524.833019
2241329 713152 354377 698 436 dr horacio arruda 305.740347 828.259653 22.433512 -13.629828 234925.465222 8.326035e+05 986.662499 -524.100464
5387571 776737 436686 719 609 port masque obligatoire 358.045133 969.954867 19.075855 -11.589832 267031.586763 9.463914e+05 986.365623 -523.942768
4664088 665165 300397 804 397 national santé public 323.804371 877.195629 26.685592 -16.213246 212486.126420 7.530759e+05 982.030411 -521.639968
3915591 644110 283356 435 313 laurent duvernay tardif 201.670000 546.330000 16.430468 -9.982586 204102.541034 7.233635e+05 973.948141 -517.346786
3980204 538049 170950 418 175 lieu public fermer 159.880093 433.119907 20.413821 -12.402734 156025.662925 5.529733e+05 967.145634 -513.733396
1203870 459322 95601 221 138 canada justin trudeau 96.790815 262.209185 12.625153 -7.670608 122118.971885 4.328040e+05 964.939081 -512.561308
4983282 353843 15397 307 21 pandémie covid-19 québec 88.432834 239.567166 23.242249 -14.121189 81256.695395 2.879833e+05 956.256618 -507.949312
6306395 616246 292096 1458 579 saguenay lac saint 549.200253 1487.799747 38.779556 -23.561121 199894.023423 7.084480e+05 931.238078 -494.659835
3878545 612612 313349 1464 607 lac saint jean 558.367071 1512.632929 38.325895 -23.285493 203771.343638 7.221897e+05 905.696151 -481.092343
4558536 454712 141134 381 214 mort george floyd 160.419318 434.580682 17.415628 -10.581135 131124.680220 4.647213e+05 893.612580 -474.673729
5499294 327837 31901 92 64 presse ministre canada 42.059519 113.940481 7.700529 -4.678576 79165.640473 2.805724e+05 883.806933 -469.465115
1265346 336551 46578 657 116 cas confirmer covid-19 208.410307 564.589693 31.073466 -18.879167 84313.174223 2.988158e+05 868.685344 -461.432752
5346178 297029 16771 246 8 point pandémie covid-19 68.481524 185.518476 21.451460 -13.033168 69056.307591 2.447437e+05 867.523308 -460.815496
5684355 289570 16027 435 42 président donald trump 128.605067 348.394933 27.017959 -16.415181 67251.116734 2.383459e+05 857.287719 -455.378503
2306169 296252 28397 419 55 déclarer positif covid-19 127.796230 346.203770 25.759532 -15.650604 71443.789685 2.532052e+05 841.065709 -446.761612
7184563 260160 6314 936 23 union avenir érable 258.558195 700.441805 42.130134 -25.596817 58641.524886 2.078325e+05 832.170355 -442.036532
4768506 260160 6314 936 23 nouvelle union avenir 258.558195 700.441805 42.130134 -25.596817 58641.524886 2.078325e+05 832.170355 -442.036532
4722538 309740 45125 96 32 norah romy carpentier 34.510374 93.489626 10.467115 -6.359459 78093.265116 2.767717e+05 828.932930 -440.316860
4423520 522251 276344 678 307 ministre justin trudeau 265.568115 719.431885 25.308375 -15.376496 175742.581137 6.228524e+05 826.561724 -439.057311
2034682 269925 15519 421 34 dernier 24 heure 122.673596 332.326404 26.934948 -16.364746 62816.152531 2.226278e+05 826.348010 -438.943789
4424969 463512 207419 501 146 ministre québec françois 174.439158 472.560842 24.725331 -15.022259 147648.239352 5.232828e+05 822.026211 -436.648113
5878446 239597 2775 6 11 ramasser canette vide 4.583409 12.416591 0.661683 -0.402016 53337.525123 1.890345e+05 806.495492 -428.398426
2588768 229115 972 103 4 envoyer nous image 28.848516 78.151484 13.805690 -8.387861 50634.030098 1.794530e+05 793.178018 -421.324382
1311275 349674 104971 321 158 centre ville montréal 129.144291 349.855709 16.882514 -10.257234 100051.322386 3.545937e+05 789.173731 -419.197364
3028832 340793 101752 316 115 frontière canado américain 116.202901 314.797099 18.534492 -11.260919 97388.539334 3.451565e+05 779.964088 -414.305339
2364701 229711 7919 324 19 démocrate joe biden 92.477019 250.522981 24.075604 -14.627507 52293.978244 1.853360e+05 775.835135 -412.112100
6370412 331046 109797 401 146 santé public canada 147.477928 399.522072 20.876239 -12.683683 97013.989189 3.438290e+05 751.377321 -399.120473
7689041 255347 42021 70 61 émission spécial coronavirus 35.319211 95.680789 5.835573 -3.545493 65440.204193 2.319278e+05 742.366188 -394.333893
5502805 505800 339271 547 231 prestation canadien urgence 209.758369 568.241631 23.285284 -14.147336 185970.308835 6.591007e+05 741.646536 -393.951624
3009464 295452 81212 295 139 françois legault annoncer 117.011738 316.988262 16.454195 -9.997002 82890.455840 2.937735e+05 738.299259 -392.173600
In [89]:
fig = px.bar(graph_interactions_media3, x="res_inter_media", y="trigramme", labels={"res_inter_media":"Résiduel de Pearson (<b>en fonction des interactions</b>)","trigramme":"Trigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="green", opacity=0.75, textfont_size=12)
fig
In [90]:
graph_interactions_nonmedia3 = khi2_3.sort_values(by=["res_inter_nonmedia"],ascending=False).head(50)
graph_interactions_nonmedia3
Out[90]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
1557869 0 2254948 0 688 com timeline photo 185.493262 502.506738 -13.619591 8.274794 496234.489135 1.758714e+06 -704.439131 374.187603
4711113 0 1798241 0 989 nom jésus christ 266.646564 722.353436 -16.329316 9.921130 395729.393306 1.402512e+06 -629.070261 334.152779
3134740 0 1517950 0 123 giving an update 33.162313 89.837687 -5.758673 3.498771 334047.234252 1.183903e+06 -577.968195 307.008120
3415310 0 1421670 0 115 i giving an 31.005414 83.994586 -5.568251 3.383077 312859.403484 1.108811e+06 -559.338362 297.112230
7568817 0 1397459 0 1435 wwwuniversty energy energy 386.893649 1048.106351 -19.669612 11.950578 307531.416667 1.089928e+06 -554.555152 294.571460
7143918 0 1326669 0 112 tune in now 30.196578 81.803422 -5.495141 3.338658 291953.035487 1.034716e+06 -540.326786 287.013563
549591 0 1236585 0 110 an update the 29.657353 80.342647 -5.445856 3.308715 272128.725694 9.644563e+05 -521.659588 277.097825
3501643 0 1200438 0 97 in now for 26.152393 70.847607 -5.113941 3.107054 264174.046438 9.362640e+05 -513.978644 273.017821
6288566 1471 1204625 3 438 révérend paul mukendi 118.899024 322.100976 -10.628957 6.457788 265419.172596 9.406768e+05 -512.333217 272.143795
7540532 0 1186889 0 108 we re doing 29.118128 78.881872 -5.396122 3.278497 261192.389614 9.256966e+05 -511.069848 271.472712
6612969 0 1121124 0 339 solution com timeline 91.398569 247.601431 -9.560260 5.808484 246719.833627 8.744042e+05 -496.709003 263.844445
1683269 1035 1110029 3 159 conférence presse covid-19 43.677192 118.322808 -6.154937 3.739528 244505.982593 8.665580e+05 -492.382335 261.546183
302319 0 1007281 0 190 across the country 51.226337 138.773663 -7.157258 4.348503 221667.006268 7.856140e+05 -470.815257 250.090071
2949428 0 977150 0 85 for the latest 22.917045 62.082955 -4.787175 2.908523 215036.236338 7.621138e+05 -463.719998 246.321175
5346675 32303 1203742 46 369 point presse direct 111.889104 303.110896 -6.229020 3.784538 272009.890748 9.640351e+05 -459.608626 244.137275
7540592 0 934522 0 78 we re taking 21.029759 56.970241 -4.585822 2.786187 205655.317664 7.288667e+05 -453.492357 240.888404
2121759 0 917439 0 593 dimanche jeudi 18h30 159.880093 433.119907 -12.644370 7.682283 201895.952136 7.155430e+05 -449.328334 238.676537
5498120 0 914799 0 116 presse direct conférence 31.275027 84.724973 -5.592408 3.397754 201314.981288 7.134840e+05 -448.681381 238.332885
3712787 0 911412 0 584 jeudi 18h30 noovo 157.453583 426.546417 -12.548051 7.623763 200569.622098 7.108424e+05 -447.850000 237.891268
4722339 0 887305 0 700 noovo télé web 188.728609 511.271391 -13.737853 8.346646 195264.522012 6.920405e+05 -441.887454 234.724052
4828985 0 865352 0 573 od dimanche jeudi 154.487847 418.512153 -12.429314 7.551623 190433.441322 6.749186e+05 -436.386802 231.802188
6955097 0 843897 0 84 the work we 22.647433 61.352567 -4.758932 2.891363 185711.952860 6.581850e+05 -430.943097 228.910572
7564311 0 841281 0 79 work we re 21.299372 57.700628 -4.615124 2.803990 185136.263565 6.561447e+05 -430.274637 228.555496
2139381 15628 953514 14 122 direct conférence presse 36.667273 99.332727 -3.743346 2.274328 213273.958100 7.558680e+05 -427.975613 227.334289
77268 0 820453 0 521 18h30 noovo télé 140.468008 380.531992 -11.851920 7.200818 180552.755679 6.399002e+05 -424.914998 225.708536
5243188 0 815571 0 164 photo from françois 44.216417 119.783583 -6.649543 4.040033 179478.399740 6.360926e+05 -423.648911 225.036009
7157583 0 796804 0 624 télé web rattrapage 168.238075 455.761925 -12.970662 7.880527 175348.445233 6.214556e+05 -418.746278 222.431803
3242645 0 791848 0 4 guinéen incroyable talent 1.078449 2.921551 -1.038484 0.630947 174257.804505 6.175902e+05 -417.441977 221.738978
5216398 243 792944 1 902 petit petit gamin 243.459906 659.540094 -15.539112 9.441029 174552.471158 6.186345e+05 -417.213146 221.617426
3023726 0 783992 0 97 from françois legault' 26.152393 70.847607 -5.113941 3.107054 172528.976103 6.114630e+05 -415.366075 220.636289
3010800 0 783992 0 97 françois legault' post 26.152393 70.847607 -5.113941 3.107054 172528.976103 6.114630e+05 -415.366075 220.636289
5914088 0 773019 0 64 re taking to 17.255187 46.744813 -4.153936 2.523789 170114.205984 6.029048e+05 -412.449034 219.086801
4711083 0 767091 0 311 nom jésus amen 83.849425 227.150575 -9.156933 5.563437 168809.662354 5.982813e+05 -410.864530 218.245136
5211473 0 748113 0 777 petit gamin oboxadsq 209.488756 567.511244 -14.473726 8.793737 164633.274191 5.834797e+05 -405.750261 215.528512
6443467 0 746776 0 784 seigneur jésus christ 211.376043 572.623957 -14.538777 8.833260 164339.047667 5.824370e+05 -405.387528 215.335833
6720478 0 732900 0 61 steps we re 16.446350 44.553650 -4.055410 2.463927 161285.429680 5.716146e+05 -401.603573 213.325853
6068098 0 728364 0 65 restart our economy 17.524799 47.475201 -4.186263 2.543429 160287.216133 5.680768e+05 -400.358859 212.664679
3850334 0 724651 0 63 keep you safe 16.985575 46.014425 -4.121356 2.503994 159470.115846 5.651809e+05 -399.337096 212.121934
1828662 83984 1297761 141 1059 courir chance gagner 323.534759 876.465241 -10.148109 6.165641 304073.319735 1.077672e+06 -399.125808 212.009701
4712693 0 722483 0 254 nom puissant jésus 68.481524 185.518476 -8.275356 5.027821 158993.015543 5.634900e+05 -398.739283 211.804384
6953645 0 713239 0 62 the steps we 16.715963 45.284037 -4.088516 2.484041 156958.737317 5.562803e+05 -396.180183 210.445029
3734123 0 677115 0 82 john colem morvan 22.108209 59.891791 -4.701937 2.856735 149009.119550 5.281059e+05 -386.016994 205.046494
4844865 0 676760 0 136 official boris jardel 36.667273 99.332727 -6.055351 3.679022 148930.996576 5.278290e+05 -385.915789 204.992735
6988844 0 659117 0 89 to make sure 23.995495 65.004505 -4.898520 2.976172 145048.394808 5.140686e+05 -380.852195 202.303029
6614844 0 652069 0 196 solution timeline photo 52.844011 143.155989 -7.269389 4.416630 143497.378696 5.085716e+05 -378.810479 201.218500
1295900 0 645866 0 169 ce message résonne 45.564479 123.435521 -6.750147 4.101156 142132.317268 5.037337e+05 -377.004400 200.259137
5913790 0 638545 0 63 re doing to 16.985575 46.014425 -4.121356 2.503994 140521.223489 4.980238e+05 -374.861606 199.120917
3836685 0 635542 0 313 jésus christ amen 84.388650 228.611350 -9.186329 5.581297 139860.369150 4.956816e+05 -373.979103 198.652144
2633539 0 606443 0 343 ethnies cote ivoire 92.477019 250.522981 -9.616497 5.842652 133456.706006 4.729863e+05 -365.317268 194.051106
2141440 0 604932 0 48 direct radio pou 12.941390 35.058610 -3.597414 2.185665 133124.188221 4.718078e+05 -364.861876 193.809209
In [91]:
fig = px.bar(graph_interactions_nonmedia3, x="res_inter_nonmedia", y="trigramme", labels={"res_inter_nonmedia":"Résiduel de Pearson (<b>en fonction des interactions</b>)","trigramme":"Trigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="lightgreen", opacity=0.75, textfont_size=12)
fig

In English

In [92]:
pays = "French Canada"
In [93]:
termeMaj = "Lemma"
termeMin = "lemma"
yy = "mot"
media = "media"
source = graph_interactions_media1[:10]
xx= "res_inter_media"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [94]:
termeMaj = "Lemma"
termeMin = "lemma"
yy = "mot"
media = "non-media"
source = graph_interactions_nonmedia1[:10]
xx= "res_inter_nonmedia"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [95]:
termeMaj = "Bigram"
termeMin = "bigram"
yy = "bigramme"
media = "media"
source = graph_interactions_media2[:10]
xx= "res_inter_media"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [96]:
termeMaj = "Bigram"
termeMin = "bigram"
yy = "bigramme"
media = "non-media"
source = graph_interactions_nonmedia2[:10]
xx= "res_inter_nonmedia"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [97]:
termeMaj = "Trigram"
termeMin = "trigram"
yy = "trigramme"
media = "media"
source = graph_interactions_media3[:10]
xx= "res_inter_media"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [98]:
termeMaj = "Trigram"
termeMin = "trigram"
yy = "trigramme"
media = "non-media"
source = graph_interactions_nonmedia3[:10]
xx = "res_inter_nonmedia"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [ ]: