0%

how to get alexa rank with python xml and save to csv using pandas

How to get alexa rank

xml demo

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
content = """
<ALEXA VER="0.9" URL="kezunlin.me/" HOME="0" AID="=" IDN="kezunlin.me/">
<RLS PREFIX="http://" more="0"> </RLS>
<SD TITLE="A" FLAGS="" HOST="kezunlin.me"> </SD>
<SD>
<POPULARITY URL="kezunlin.me/" TEXT="2489312" SOURCE="panel"/>
<REACH RANK="2141483"/>
<RANK DELTA="-1406486"/>
</SD>
</ALEXA>
"""

root = ElementTree.fromstring(content)
# obj.tag, obj.attrib

for name, value in root.attrib.items():
print('{0}="{1}"'.format(name, value))
for child in root:
print(child.tag, child.attrib)

pandas demo

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def demo_create_dataframe_1():
# initialize list of lists
data = [['tom', 10], ['nick', 15], ['juli', 14]]

# Create the pandas DataFrame
df = pd.DataFrame(data, columns = ['Name', 'Age'])

# print dataframe.
print(df)

def demo_create_dataframe_2():
# intialise data of lists.
data = {'Name':['Tom', 'nick', 'krish', 'jack'], 'Age':[20, 21, 19, 18]}

# Create DataFrame
df = pd.DataFrame(data)

# Print the output.
print(df)

def demo_append_dataframe():
# Creating the first Dataframe using dictionary
df1 = df = pd.DataFrame({"a":[1, 2, 3, 4],
"b":[5, 6, 7, 8]})

# Creating the Second Dataframe using dictionary
df2 = pd.DataFrame({"a":[1, 2, 3],
"b":[5, 6, 7]})

# Print df1
print(df1, "\n")

# to append df2 at the end of df1 dataframe
new_df = df1.append(df2, ignore_index = True) # 更新index
print(new_df)

def demo_pandas_index():
data = {'Name':['Tom', 'nick', 'krish', 'jack'], 'Age':[20, 21, 19, 18]}
df = pd.DataFrame(data)

# case1:
df.to_csv('data.csv',index=False)
df = pd.read_csv('file_name.csv')

# case2
df.to_csv('data.csv ', index=True) # default with index
df_new = pd.read_csv('data.csv').drop(['unnamed 0'],axis=1)

get rank and save to csv

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/python
# -*- coding: UTF-8 -*-

import os
import datetime
import pandas as pd
import requests
from xml.etree import ElementTree

def get_alexa_rank(site="kezunlin.me"):
url = "http://data.alexa.com/data?cli=10&dat=snbamz&url={}".format(site)
#print(url)
r = requests.get(url)
rank = 0
if r.status_code == 200:
#print(r.content)
root = ElementTree.fromstring(r.content)
rank = int(root[2][0].attrib["TEXT"]) # by index
return rank

"""
<ALEXA VER="0.9" URL="kezunlin.me/" HOME="0" AID="=" IDN="kezunlin.me/">
<RLS PREFIX="http://" more="0"> </RLS>
<SD TITLE="A" FLAGS="" HOST="kezunlin.me"> </SD>
<SD>
<POPULARITY URL="kezunlin.me/" TEXT="2489312" SOURCE="panel"/>
<REACH RANK="2141483"/>
<RANK DELTA="-1406486"/>
</SD>
</ALEXA>
"""

def save_to_csv(date_str,rank,csv_filepath='rank.csv'):
# load existing df1
df1 = pd.read_csv(csv_filepath)
#print(df1)

# create df2
data = {'Date':[date_str], 'Rank':[rank]}
df2 = pd.DataFrame(data)
#print(df2)

# append
df = df1.append(df2, ignore_index = True) # 更新index

# save new df to csv
df.to_csv(csv_filepath, index=False, sep=',', encoding='utf-8')
#print(df)
print("Appending to {}".format(csv_filepath))

def main():
# get rank
rank = get_alexa_rank("kezunlin.me")
date = datetime.datetime.now()
date_str = date.strftime('%Y-%m-%d')
print("date = {} , rank = {}".format(date_str, rank))

save_to_csv(date_str,rank)

if __name__ =="__main__":
main()

output

date = 2019-12-16 , rank = 2486318
Appending to rank.csv

view results

1
2
3
4
$ cat rank.csv 
Date,Rank
2019-12-13,2489312
2019-12-16,2486318

Reference

History

  • 2019/12/16: created.