How to get alexa rank
xml demo
content = """
<ALEXA VER="0.9" URL="kezunlin.me/" HOME="0" AID="=" IDN="kezunlin.me/">
<RLS PREFIX="http://" more="0"> </RLS>
<SD TITLE="A" FLAGS="" HOST="kezunlin.me"> </SD>
<SD>
<POPULARITY URL="kezunlin.me/" TEXT="2489312" SOURCE="panel"/>
<REACH RANK="2141483"/>
<RANK DELTA="-1406486"/>
</SD>
</ALEXA>
"""
root = ElementTree.fromstring(content)
# obj.tag, obj.attrib
for name, value in root.attrib.items():
print('{0}="{1}"'.format(name, value))
for child in root:
print(child.tag, child.attrib)
pandas demo
def demo_create_dataframe_1():
# initialize list of lists
data = [['tom', 10], ['nick', 15], ['juli', 14]]
# Create the pandas DataFrame
df = pd.DataFrame(data, columns = ['Name', 'Age'])
# print dataframe.
print(df)
def demo_create_dataframe_2():
# intialise data of lists.
data = {'Name':['Tom', 'nick', 'krish', 'jack'], 'Age':[20, 21, 19, 18]}
# Create DataFrame
df = pd.DataFrame(data)
# Print the output.
print(df)
def demo_append_dataframe():
# Creating the first Dataframe using dictionary
df1 = df = pd.DataFrame({"a":[1, 2, 3, 4],
"b":[5, 6, 7, 8]})
# Creating the Second Dataframe using dictionary
df2 = pd.DataFrame({"a":[1, 2, 3],
"b":[5, 6, 7]})
# Print df1
print(df1, "\n")
# to append df2 at the end of df1 dataframe
new_df = df1.append(df2, ignore_index = True) # 更新index
print(new_df)
def demo_pandas_index():
data = {'Name':['Tom', 'nick', 'krish', 'jack'], 'Age':[20, 21, 19, 18]}
df = pd.DataFrame(data)
# case1:
df.to_csv('data.csv',index=False)
df = pd.read_csv('file_name.csv')
# case2
df.to_csv('data.csv ', index=True) # default with index
df_new = pd.read_csv('data.csv').drop(['unnamed 0'],axis=1)
get rank and save to csv
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import os
import datetime
import pandas as pd
import requests
from xml.etree import ElementTree
def get_alexa_rank(site="kezunlin.me"):
url = "http://data.alexa.com/data?cli=10&dat=snbamz&url={}".format(site)
#print(url)
r = requests.get(url)
rank = 0
if r.status_code == 200:
#print(r.content)
root = ElementTree.fromstring(r.content)
rank = int(root[2][0].attrib["TEXT"]) # by index
return rank
"""
<ALEXA VER="0.9" URL="kezunlin.me/" HOME="0" AID="=" IDN="kezunlin.me/">
<RLS PREFIX="http://" more="0"> </RLS>
<SD TITLE="A" FLAGS="" HOST="kezunlin.me"> </SD>
<SD>
<POPULARITY URL="kezunlin.me/" TEXT="2489312" SOURCE="panel"/>
<REACH RANK="2141483"/>
<RANK DELTA="-1406486"/>
</SD>
</ALEXA>
"""
def save_to_csv(date_str,rank,csv_filepath='rank.csv'):
# load existing df1
df1 = pd.read_csv(csv_filepath)
#print(df1)
# create df2
data = {'Date':[date_str], 'Rank':[rank]}
df2 = pd.DataFrame(data)
#print(df2)
# append
df = df1.append(df2, ignore_index = True) # 更新index
# save new df to csv
df.to_csv(csv_filepath, index=False, sep=',', encoding='utf-8')
#print(df)
print("Appending to {}".format(csv_filepath))
def main():
# get rank
rank = get_alexa_rank("kezunlin.me")
date = datetime.datetime.now()
date_str = date.strftime('%Y-%m-%d')
print("date = {} , rank = {}".format(date_str, rank))
save_to_csv(date_str,rank)
if __name__ =="__main__":
main()
output
date = 2019-12-16 , rank = 2486318
Appending to rank.csv
view results
$ cat rank.csv
Date,Rank
2019-12-13,2489312
2019-12-16,2486318
Reference
History
- 2019/12/16: created.