#!pip install ipython-sql


#!pip install psycopg2


#!pip install nbconvert


#!brew install pandoc


%load_ext sql


%sql postgresql://postgres:******@127.0.0.1:5433/6354Database


import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import re
import warnings
warnings.filterwarnings('ignore')


%%sql result_set1_1 << 
select * from countryDevelopmentIndicator cdi
where cdi.year >= '2014-01-01'

 * postgresql://postgres:***@127.0.0.1:5433/6354Database
1519 rows affected.
Returning data to local variable result_set1_1


df1_1 = result_set1_1.DataFrame()
df1_1['year'] = df1_1['year'].astype('datetime64')
df1_1.head()


%%sql result_set1_2 << 
select * from UStradebyCountryYear ust
where ust.year >= '2014-01-01'

 * postgresql://postgres:***@127.0.0.1:5433/6354Database
1627 rows affected.
Returning data to local variable result_set1_2


df1_2 = result_set1_2.DataFrame()
df1_2['year'] = df1_2['year'].astype('datetime64')
df1_2.head()


df1 = pd.merge(df1_1, df1_2, left_on = ['countryname', 'year'], right_on = ['countryname', 'year'], how = 'left')
df1['gdp'] = df1['gdp']/1000000
df1['population'] = df1['population']/1000000
df1['imports'] = df1['imports']/1000000
df1['exports'] = df1['exports']/1000000
df1['balance'] = df1['balance']/1000000


ilocList = []
for i in range(len(df1['countryname'])):
    if df1.iloc[i]['countryname'] in ['China', 'United States', 'Japan', 'United Kingdom', 'France', 'Germany', 'Canada','Mexico']:
        ilocList.append(i)


df1_3 = df1.iloc[ilocList]
df1_3.sort_values(by = 'countryname')
df1_3.head()


plt.figure(figsize = (10, 6))
sns.lineplot(x="year", y = 'gdp', hue="countryname", palette = 'bright', data = df1_3)
plt.ylim(1000000, 22000000)
plt.xlabel('year')
plt.ylabel('GDP in Million')
plt.title('Time-Series Plot for GDP in Million by Country', fontsize = 16)
plt.show()
# plt.savefig('Figure1.png')


plt.figure(figsize = (10, 6))
sns.lineplot(x="year", y = 'tariffall', hue="countryname", palette = 'bright', data = df1_3)
plt.ylim(0, 18)
plt.xlabel('year')
plt.ylabel('Tariff Rate')
plt.title('Time-Series Plot for Tariffs Applied by Country', fontsize = 16)
plt.show()
# plt.savefig('Figure2.png')


ilocList2 = []
for i in range(len(df1['countryname'])):
    if df1.iloc[i]['countryname'] in ['China', 'Japan', 'United Kingdom', 'France', 'Germany', 'Canada', 'Mexico']:
        ilocList2.append(i)


df1_4 = df1.iloc[ilocList2]
df1_4.sort_values(by = 'countryname')
df1_4.head()


plt.figure(figsize = (10, 6))
sns.lineplot(x="year", y = 'imports', hue="countryname", palette = 'bright', data = df1_4)
plt.xlabel('year')
plt.ylabel('Imports in Million')
plt.title('Time-Series Plot for U.S. Imports by Country', fontsize = 16)
plt.show()
# plt.savefig('Figure3.png')


plt.figure(figsize = (10, 6))
sns.lineplot(x="year", y = 'exports', hue="countryname", palette = 'bright', data = df1_4)
plt.xlabel('year')
plt.ylabel('Exports in Million')
plt.title('Time-Series Plot for U.S. Exports by Country', fontsize = 16)
plt.show()
# plt.savefig('Figure4.png')


plt.figure(figsize = (10, 6))
sns.lineplot(x="year", y = 'balance', hue="countryname", palette = 'bright', data = df1_4)
plt.xlabel('year')
plt.ylabel('Trade Balance in Million')
plt.title('Time-Series Plot for U.S. Trade Balance by Country', fontsize = 16)
plt.show()
# plt.savefig('Figure5.png')


%%sql result_set2 << 
select tc.htsnumber, tc.month, tc.imports, tc.exports, tc.balance, tc. sector, cn.description
from USChinatradebyCommoditySectorMonthly tc
inner join commoditybyhtsnumber cn
on tc.htsnumber = cn.htsnumber
where tc.month >= '2017-01-01';

 * postgresql://postgres:***@127.0.0.1:5433/6354Database
6014 rows affected.
Returning data to local variable result_set2


df2 = result_set2.DataFrame()
df2['month'] = df2['month'].astype('datetime64').dt.year


plt.figure(figsize = (10, 8))
sns.lineplot(x="month", y = 'imports', hue="sector", palette = 'bright', data = df2)
plt.xlabel('Month')
plt.ylabel('US imports from China')
plt.title('US Imports from China by Sectors', fontsize = 16)
plt.show()
# plt.savefig('Figure6.png')


plt.figure(figsize = (10, 8))
sns.lineplot(x="month", y = 'exports', hue="sector", palette = 'bright', data = df2)
plt.xlabel('Month')
plt.ylabel('US Exports to China')
plt.title('US Exports to China by Sectors', fontsize = 16)
plt.show()
# plt.savefig('Figure7.png')


plt.figure(figsize = (10,8))
sns.lineplot(x="month", y = 'balance', hue="sector", palette = 'bright', data = df2)
plt.xlabel('Month')
plt.ylabel('US Trade Balance with China')
plt.title('US Trade Balance with China by Sectors', fontsize = 16)
plt.show()
# plt.savefig('Figure8.png')


%%sql result_set3 << 
select trc.TariffAction, trc.Date, trc.ChineseTariffsOnUSExports, trc.ChineseTariffsOnROWExports, trc.USTariffsOnChineseExports, 
trc.USTariffsOnROWExports, tc.ChineseExportsSubjectToUSTariffs, tc.USExportsSubjectToChineseTariffs,
trc.TradeWar, trc.PhaseOneAgreement
from USChinaTariffRateChange trc
inner join USChinaTradeTariffCoverage tc
on trc.date = tc.date;

 * postgresql://postgres:***@127.0.0.1:5433/6354Database
38 rows affected.
Returning data to local variable result_set3


df3 = result_set3.DataFrame()
df3.head()


plt.figure(figsize = (10, 6))
sns.set_style("white")
sns.lineplot(data=df3, x="date", y="chinesetariffsonusexports")
sns.lineplot(data=df3, x="date", y="chinesetariffsonrowexports")
sns.lineplot(data=df3, x="date", y="ustariffsonchineseexports")
sns.lineplot(data=df3, x="date", y="ustariffsonrowexports")

color_map = ['orange']
plt.stackplot(df3.date, df3.tradewar,colors=color_map)
color_map = ['blue']
plt.stackplot(df3.date, df3.phaseoneagreement,colors=color_map)
plt.legend(labels=["Chinese Tariffs on US Exports", "Chinese Tariffs on Row Exports", 
                   "US Tariffs on Chinese Exports", "US Tariffs on Row Exports", 
                   'Trade War', 'Phase One Agreement'])
plt.xlabel('Date')
plt.ylabel('Tariff Rate')
plt.title('U.S. and China Tariff on Each Other and on the other Countries during the Trade War Period',  weight = 'bold', fontsize = 16)
# plt.savefig('Figure.png')
plt.show()
# plt.savefig('Figure9.png')


plt.figure(figsize = (10, 6))
sns.set_style("white")
sns.lineplot(data=df3, x="date", y="chineseexportssubjecttoustariffs")
sns.lineplot(data=df3, x="date", y="usexportssubjecttochinesetariffs")
color_map = ['orange']
plt.stackplot(df3.date, df3.tradewar,colors=color_map)
color_map = ['blue']
plt.stackplot(df3.date, df3.phaseoneagreement,colors=color_map)
plt.legend(labels=["Chinese Exports Subject to US Tariffs", "US Exports Subject to Chinese Tariffs",
                  'Trade War', 'Phase One Agreement'])
plt.xlabel('Date')
plt.ylabel('Percentage Under Tariff')
plt.title('U.S. and China Exports to Each Other under Imposed Tariff during the Trade War Period',  weight = 'bold', fontsize = 16)
# plt.savefig('Figure.png')
plt.show()
# plt.savefig('Figure10.png')


%%sql result_set4 << 
select * from sp500withgics;

 * postgresql://postgres:***@127.0.0.1:5433/6354Database
505 rows affected.
Returning data to local variable result_set4


df4 = result_set4.DataFrame()
df4['datefirstadded'] = df4['datefirstadded'].astype('datetime64')
df4['datefirstadded'] = pd.to_datetime(df4['datefirstadded'])
df4['yearfirstadded'] = df4['datefirstadded'].dt.year


df4['hqlocation'] = df4['hqlocation'].apply(lambda x: re.split(r',', x)[1])
df4['hqlocation'] = df4['hqlocation'].apply(lambda x: re.split(r'\;|\[', x)[0])


df4_1 = df4.groupby(['hqlocation']).count()
df4_1['Number of Companies in that location'] = df4_1['symbol']
df4_1 = df4_1[['Number of Companies in that location']].sort_values(by = 'Number of Companies in that location', ascending = False)
df4_1.head(10)


plt.figure(figsize = (20, 8))
sns.barplot(x = df4_1.index, y = 'Number of Companies in that location', data = df4_1)
plt.xticks(rotation=70)
plt.xlabel('Location')
plt.ylabel('Number of Companies')
plt.title('Bar plot for Number of Companies Headquartered in that location', fontsize = 16)
plt.show()
# plt.savefig('Figure11.png')


df4_2 = df4.groupby(['sector']).count()


df4_2['Number of Companies in the sector'] = df4_2['symbol']
df4_2 = df4_2[['Number of Companies in the sector']].sort_values(by = 'Number of Companies in the sector', ascending = False)
df4_2.head(11)


plt.figure(figsize = (10, 8))
sns.barplot(x = df4_2.index, y = 'Number of Companies in the sector', data = df4_2)
plt.xticks(rotation=70)
plt.xlabel('Sector')
plt.ylabel('Number of Companies')
plt.title('Bar plot for Number of Companies in the Sector', fontsize = 16)
plt.show()
# plt.savefig('Figure12.png')


%%sql result_set5 << 
select sp.symbol, sp.companyName, sp.sector, forta.year, forta.revenues
from sp500withgics sp
inner join fortune500Annual forta
on sp.symbol = forta.symbol
where forta.year >= '2017';

 * postgresql://postgres:***@127.0.0.1:5433/6354Database
1433 rows affected.
Returning data to local variable result_set5


df5 = result_set5.DataFrame()
df5['year'] = df5['year'].astype('datetime64')
df5.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1433 entries, 0 to 1432
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   symbol       1433 non-null   object        
 1   companyname  1433 non-null   object        
 2   sector       1433 non-null   object        
 3   year         1433 non-null   datetime64[ns]
 4   revenues     1433 non-null   object        
dtypes: datetime64[ns](1), object(4)
memory usage: 56.1+ KB


df5['sector'].unique()

array(['Consumer Staples', 'Financials', 'Information Technology',
       'Energy', 'Health Care', 'Consumer Discretionary',
       'Communication Services', 'Industrials', 'Utilities', 'Materials',
       'Real Estate'], dtype=object)


df5_1 = df5[df5['sector'] == 'Information Technology']


## Commented to save space in the final report
# plt.figure(figsize = (20, 15))
# sns.lineplot(x="year", y = 'revenues', hue="symbol", palette = 'bright', data = df5_1)
# plt.xlabel('Year')
# plt.ylabel('Revenues')
# plt.title('Annual Revenue Data for Information Technology Companies', fontsize = 16)
# plt.show()


df5_2 = df5[df5['sector'] == 'Industrials']


plt.figure(figsize = (20, 15))
sns.lineplot(x="year", y = 'revenues', hue="symbol", palette = 'bright', data = df5_2)
plt.xlabel('Year')
plt.ylabel('Revenues')
plt.title('Annual Revenue Data for Industrials Companies', fontsize = 16)
plt.show()
# plt.savefig('Figure13.png')


df5_3 = df5[df5['sector'] == 'Financials']


plt.figure(figsize = (20, 14))
sns.lineplot(x="year", y = 'revenues', hue="symbol", palette = 'bright', data = df5_3)
plt.xlabel('Year')
plt.ylabel('Revenues')
plt.title('Annual Revenue Data for Information Technology Companies', fontsize = 16)
plt.show()
# plt.savefig('Figure14.png')


df5_4 = df5[df5['sector'] == 'Health Care']


## Commented to save space in the final report
# plt.figure(figsize = (20, 12))
# sns.lineplot(x="year", y = 'revenues', hue="symbol", palette = 'bright', data = df5_4)
# plt.xlabel('Year')
# plt.ylabel('Revenues')
# plt.title('Annual Revenue Data for Health Care Companies', fontsize = 16)
# plt.show()


df5_5 = df5[df5['sector'] == 'Consumer Discretionary']


## Commented to save space in the final report
# plt.figure(figsize = (20, 12))
# sns.lineplot(x="year", y = 'revenues', hue="symbol", palette = 'bright', data = df5_5)
# plt.xlabel('Year')
# plt.ylabel('Revenues')
# plt.title('Annual Revenue Data for Consumer Discretionary Companies', fontsize = 16)
# plt.show()


df5_6 = df5[df5['sector'] == 'Consumer Staples']


plt.figure(figsize = (20, 10))
sns.lineplot(x="year", y = 'revenues', hue="symbol", palette = 'bright', data = df5_6)
plt.xlabel('Year')
plt.ylabel('Revenues')
plt.title('Annual Revenue Data for Consumer Staples Companies', fontsize = 16)
plt.show()
# plt.savefig('Figure15.png')


df5_7 = df5[df5['sector'] == 'Real Estate']


## Commented to save space in the final report
# plt.figure(figsize = (20, 10))
# sns.lineplot(x="year", y = 'revenues', hue="symbol", palette = 'bright', data = df5_7)
# plt.xlabel('Year')
# plt.ylabel('Revenues')
# plt.title('Annual Revenue Data for Real Estate Companies', fontsize = 16)
# plt.show()


df5_8 = df5[df5['sector'] == 'Materials']


## Commented to save space in the final report
# plt.figure(figsize = (20, 10))
# sns.lineplot(x="year", y = 'revenues', hue="symbol", palette = 'bright', data = df5_8)
# plt.xlabel('Year')
# plt.ylabel('Revenues')
# plt.title('Annual Revenue Data for Materials Companies', fontsize = 16)
# plt.show()


df5_9 = df5[df5['sector'] == 'Utilities']


## Commented to save space in the final report
# plt.figure(figsize = (20, 10))
# sns.lineplot(x="year", y = 'revenues', hue="symbol", palette = 'bright', data = df5_9)
# plt.xlabel('Year')
# plt.ylabel('Revenues')
# plt.title('Annual Revenue Data for Utilities Companies', fontsize = 16)
# plt.show()


df5_10 = df5[df5['sector'] == 'Communication Services']


## Commented to save space in the final report
# plt.figure(figsize = (20, 10))
# sns.lineplot(x="year", y = 'revenues', hue="symbol", palette = 'bright', data = df5_10)
# plt.xlabel('Year')
# plt.ylabel('Revenues')
# plt.title('Annual Revenue Data for Communication Services Companies', fontsize = 16)
# plt.show()


df5_11 = df5[df5['sector'] == 'Energy']


# plt.figure(figsize = (20, 10))
# sns.lineplot(x="year", y = 'revenues', hue="symbol", palette = 'bright', data = df5_11)
# plt.xlabel('Year')
# plt.ylabel('Revenues')
# plt.title('Annual Revenue Data for Energy Companies', fontsize = 16)
# plt.show()


%%sql result_set6 << 
select sp.symbol, sp.sector, sp.sectorid, s.date, s.adjustedclose
from sp500withgics sp
join sp500stockprice s
on sp.symbol = s.symbol;

 * postgresql://postgres:***@127.0.0.1:5433/6354Database
661196 rows affected.
Returning data to local variable result_set6


df6 = result_set6.DataFrame()
df6['30day_ave_close'] = df6.adjustedclose.rolling(30).mean().shift(-3)
df6['date'] = df6['date'].astype('datetime64')
df6.head(31)
df6 = df6[df6['date'] >= '2017-02-09']


df6['sector'].unique()

array(['Information Technology', 'Financials', 'Industrials',
       'Health Care', 'Consumer Discretionary', 'Consumer Staples',
       'Materials', 'Communication Services', 'Utilities', 'Energy',
       'Real Estate'], dtype=object)


df6_1 = df6[df6['sector'] == 'Information Technology']


plt.figure(figsize = (20, 20))
sns.lineplot(x="date", y = '30day_ave_close', hue="symbol", palette = 'bright', data = df6_1)
plt.xlabel('Date')
plt.ylabel('30-day rolling average adjusted close stock price')
plt.title('Time-Series Stock Price Plot for Information Technology Companies', fontsize = 16)
plt.show()
# plt.savefig('Figure16.png')


df6_2 = df6[df6['sector'] == 'Industrials']


## Commented to save space in the final report
# plt.figure(figsize = (20, 20))
# sns.lineplot(x="date", y = '30day_ave_close', hue="symbol", palette = 'bright', data = df6_2)
# plt.xlabel('Date')
# plt.ylabel('30-day rolling average adjusted close stock price')
# plt.title('Time-Series Stock Price Plot for Industrials Companies', fontsize = 16)
# plt.show()


df6_3 = df6[df6['sector'] == 'Financials']


## Commented to save space in the final report
# plt.figure(figsize = (20, 18))
# sns.lineplot(x="date", y = '30day_ave_close', hue="symbol", palette = 'bright', data = df6_3)
# plt.xlabel('Date')
# plt.ylabel('30-day rolling average adjusted close stock price')
# plt.title('Time-Series Stock Price Plot for Financials Companies', fontsize = 16)
# plt.show()


df6_4 = df6[df6['sector'] == 'Health Care']


plt.figure(figsize = (20, 17))
sns.lineplot(x="date", y = '30day_ave_close', hue="symbol", palette = 'bright', data = df6_4)
plt.xlabel('Date')
plt.ylabel('30-day rolling average adjusted close stock price')
plt.title('Time-Series Stock Price Plot for Health Care Companies', fontsize = 16)
plt.show()
# plt.savefig('Figure17.png')


df6_5 = df6[df6['sector'] == 'Consumer Discretionary']


## Commented to save space in the final report
# plt.figure(figsize = (20, 17))
# sns.lineplot(x="date", y = '30day_ave_close', hue="symbol", palette = 'bright', data = df6_5)
# plt.xlabel('Date')
# plt.ylabel('30-day rolling average adjusted close stock price')
# plt.title('Time-Series Stock Price Plot for Consumer Discretionary Companies', fontsize = 16)
# plt.show()


df6_6 = df6[df6['sector'] == 'Consumer Staples']


## Commented to save space in the final report
# plt.figure(figsize = (20, 12))
# sns.lineplot(x="date", y = '30day_ave_close', hue="symbol", palette = 'bright', data = df6_6)
# plt.xlabel('Date')
# plt.ylabel('30-day rolling average adjusted close stock price')
# plt.title('Time-Series Stock Price Plot for Consumer Staples Companies', fontsize = 16)
# plt.show()


df6_7 = df6[df6['sector'] == 'Real Estate']


## Commented to save space in the final report
# plt.figure(figsize = (20, 12))
# sns.lineplot(x="date", y = '30day_ave_close', hue="symbol", palette = 'bright', data = df6_7)
# plt.xlabel('Date')
# plt.ylabel('30-day rolling average adjusted close stock price')
# plt.title('Time-Series Stock Price Plot for Real Estate Companies', fontsize = 16)
# plt.show()


df6_8 = df6[df6['sector'] == 'Materials']


# plt.figure(figsize = (20, 12))
# sns.lineplot(x="date", y = '30day_ave_close', hue="symbol", palette = 'bright', data = df6_8)
# plt.xlabel('Date')
# plt.ylabel('30-day rolling average adjusted close stock price')
# plt.title('Time-Series Stock Price Plot for Materials Companies', fontsize = 16)
# plt.show()


df6_9 = df6[df6['sector'] == 'Utilities']


## Commented to save space in the final report
# plt.figure(figsize = (20, 12))
# sns.lineplot(x="date", y = '30day_ave_close', hue="symbol", palette = 'bright', data = df6_9)
# plt.xlabel('Date')
# plt.ylabel('30-day rolling average adjusted close stock price')
# plt.title('Time-Series Stock Price Plot for Utilities Companies', fontsize = 16)
# plt.show()


df6_10 = df6[df6['sector'] == 'Communication Services']


## Commented to save space in the final report
# plt.figure(figsize = (20, 10))
# sns.lineplot(x="date", y = '30day_ave_close', hue="symbol", palette = 'bright', data = df6_10)
# plt.xlabel('Date')
# plt.ylabel('30-day rolling average adjusted close stock price')
# plt.title('Time-Series Stock Price Plot for Communication Services Companies', fontsize = 16)
# plt.show()


df6_11 = df6[df6['sector'] == 'Energy']


## Commented to save space in the final report
# plt.figure(figsize = (20, 10))
# sns.lineplot(x="date", y = '30day_ave_close', hue="symbol", palette = 'bright', data = df6_11)
# plt.xlabel('Date')
# plt.ylabel('30-day rolling average adjusted close stock price')
# plt.title('Time-Series Stock Price Plot for Energy Companies', fontsize = 16)
# plt.show()


%%sql result_set7 << 
select fort.symbol, t20level.companyname, fort.date, fort.adjustedclose, spg.sector, spg.industry
from fortune500stockprice fort
inner join TOP20Companies_LevelofSaleinChina t20level
on fort.symbol = t20level.symbol
inner join sp500withGICS spg
on t20level.symbol = spg.symbol
where date >= '2017-01-01';

 * postgresql://postgres:***@127.0.0.1:5433/6354Database
20145 rows affected.
Returning data to local variable result_set7


df7 = result_set7.DataFrame()


df7['date'] = df7['date'].astype('datetime64')
df7['adjustedclose'] = df7['adjustedclose'].astype('float')
df7.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20145 entries, 0 to 20144
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   symbol         20145 non-null  object        
 1   companyname    20145 non-null  object        
 2   date           20145 non-null  datetime64[ns]
 3   adjustedclose  20145 non-null  float64       
 4   sector         20145 non-null  object        
 5   industry       20145 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(4)
memory usage: 944.4+ KB


df7['symbol'].unique()

array(['APH', 'INTC', 'MU', 'AVGO', 'TXN', 'GLW', 'WDC', 'AAPL', 'MMM',
       'BA', 'PG', 'AMAT', 'ABT', 'NKE', 'QCOM'], dtype=object)


df7.companyname.unique()

array(['Amphenol Corp. Class A', 'Intel Corp.', 'Micron Technology Inc.',
       'Broadcom Ltd.', 'Texas Instruments Inc.', 'Corning Inc',
       'Western Digital Corp.', 'Apple Inc.', '3M co.', 'Boeing Co.',
       'Procter & Gamble co.', 'Applied Materials Inc.',
       'Abbott Laboratories', 'Nike Inc. Class B', 'Qualcomm Inc.'],
      dtype=object)


s1 = pd.Series(df7['symbol'].unique())
s2 = pd.Series(df7.companyname.unique())
company_list = zip(s1, s2)
list(company_list)

[('APH', 'Amphenol Corp. Class A'),
 ('INTC', 'Intel Corp.'),
 ('MU', 'Micron Technology Inc.'),
 ('AVGO', 'Broadcom Ltd.'),
 ('TXN', 'Texas Instruments Inc.'),
 ('GLW', 'Corning Inc'),
 ('WDC', 'Western Digital Corp.'),
 ('AAPL', 'Apple Inc.'),
 ('MMM', '3M co.'),
 ('BA', 'Boeing Co.'),
 ('PG', 'Procter & Gamble co.'),
 ('AMAT', 'Applied Materials Inc.'),
 ('ABT', 'Abbott Laboratories'),
 ('NKE', 'Nike Inc. Class B'),
 ('QCOM', 'Qualcomm Inc.')]


plt.figure(figsize = (10, 8))
sns.lineplot(x="date", y = 'adjustedclose', hue="symbol", palette = 'Dark2', data = df7)
plt.xlabel('Date')
plt.ylabel('Adjusted Close Stock Price')
plt.title('Time-Series Stock Price Plot for 15 Companies with Highest Level of Sale in China', fontsize = 16)
plt.show()
# plt.savefig('Figure18.png')


plt.figure(figsize = (10, 8))
sns.lineplot(x="date", y = 'adjustedclose', hue="sector", palette = 'Paired', data = df7)
plt.xlabel('Date')
plt.ylabel('Adjusted Close Stock Price')
plt.title('Time-Series Stock Price Plot for 15 Companies with Highest Level of Sale in China', fontsize = 16)
plt.show()
# plt.savefig('Figure19.png')


# Amphenol Corp. Class A
APH = df7[df7.symbol == 'APH'].set_index(['date'])
# Intel Corp.
INTC = df7[df7.symbol == 'INTC'].set_index(['date'])
# Micron Technology Inc.
MU = df7[df7.symbol == 'MU'].set_index(['date'])
# Broadcom Ltd.
AVGO = df7[df7.symbol == 'AVGO'].set_index(['date'])
# Texas Instruments Inc.
TXN = df7[df7.symbol == 'TXN'].set_index(['date'])
# Corning Inc
GLW = df7[df7.symbol == 'GLW'].set_index(['date'])
# Western Digital Corp.
WDC = df7[df7.symbol == 'WDC'].set_index(['date'])
# Apple Inc.
AAPL = df7[df7.symbol == 'AAPL'].set_index(['date'])
# 3M co.
MMM = df7[df7.symbol == 'MMM'].set_index(['date'])
# Boeing Co.
BA = df7[df7.symbol == 'BA'].set_index(['date'])
# Procter & Gamble co.
PG = df7[df7.symbol == 'PG'].set_index(['date'])
# Applied Materials Inc.
AMAT = df7[df7.symbol == 'AMAT'].set_index(['date'])
# Abbott Laboratories
ABT = df7[df7.symbol == 'ABT'].set_index(['date'])
# Nike Inc. Class B
NKE = df7[df7.symbol == 'NKE'].set_index(['date'])
# Qualcomm Inc.
QCOM = df7[df7.symbol == 'QCOM'].set_index(['date'])


import os
import sys

import pandas_datareader.data as web

import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt
import statsmodels.api as sm
import scipy.stats as scs
import statsmodels.tsa as smta
from arch import arch_model

import matplotlib as mpl
%matplotlib inline
p = print


def tsplot(y, lags=None, figsize=(10, 8), style='bmh'):
    if not isinstance(y, pd.Series):
        y = pd.Series(y)
    with plt.style.context(style):    
        fig = plt.figure(figsize=figsize)
        #mpl.rcParams['font.family'] = 'Ubuntu Mono'
        layout = (2, 2)
        ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
        acf_ax = plt.subplot2grid(layout, (1, 0))
        pacf_ax = plt.subplot2grid(layout, (1, 1))
        #qq_ax = plt.subplot2grid(layout, (2, 0))
        #pp_ax = plt.subplot2grid(layout, (2, 1))
        
        y.plot(ax=ts_ax)
        ts_ax.set_title('Time Series Analysis Plots')
        smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.5)
        smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.5)
        #sm.qqplot(y, line='s', ax=qq_ax)
        #qq_ax.set_title('QQ Plot')        
        #scs.probplot(y, sparams=(y.mean(), y.std()), plot=pp_ax)

        plt.tight_layout()
    return


tsplot(AAPL.adjustedclose)
# plt.savefig('Figure20.png')


# Compute the ADF for the companies' stock data to detect stationarity
# The null hypothesis for each test is that the stock data is non-stationarity
from statsmodels.tsa.stattools import adfuller
adf_APH = adfuller(APH.adjustedclose)
print('The p-value for the ADF test on APH is ', adf_APH[1])
adf_INTC = adfuller(INTC.adjustedclose)
print('The p-value for the ADF test on INTC is ', adf_INTC[1])
adf_MU = adfuller(MU.adjustedclose)
print('The p-value for the ADF test on MU is ', adf_MU[1])
adf_AVGO = adfuller(AVGO.adjustedclose)
print('The p-value for the ADF test on AVGO is ', adf_AVGO[1])
adf_TXN = adfuller(TXN.adjustedclose)
print('The p-value for the ADF test on TXN is ', adf_TXN[1])
adf_GLW = adfuller(GLW.adjustedclose)
print('The p-value for the ADF test on GLW is ', adf_GLW[1])
adf_WDC = adfuller(WDC.adjustedclose)
print('The p-value for the ADF test on WDC is ', adf_WDC[1])
adf_AAPL = adfuller(AAPL.adjustedclose)
print('The p-value for the ADF test on AAPL is ', adf_AAPL[1])
adf_MMM = adfuller(MMM.adjustedclose)
print('The p-value for the ADF test on MMM is ', adf_MMM[1])
adf_BA = adfuller(BA.adjustedclose)
print('The p-value for the ADF test on BA is ', adf_BA[1])
adf_PG = adfuller(PG.adjustedclose)
print('The p-value for the ADF test on PG is ', adf_PG[1])
adf_AMAT = adfuller(AMAT.adjustedclose)
print('The p-value for the ADF test on AMAT is ', adf_AMAT[1])
adf_ABT = adfuller(ABT.adjustedclose)
print('The p-value for the ADF test on ABT is ', adf_ABT[1])
adf_NKE = adfuller(NKE.adjustedclose)
print('The p-value for the ADF test on NKE is ', adf_NKE[1])
adf_QCOM = adfuller(QCOM.adjustedclose)
print('The p-value for the ADF test on QCOM is ', adf_QCOM[1])

The p-value for the ADF test on APH is  0.7806927548320779
The p-value for the ADF test on INTC is  0.11617598297062354
The p-value for the ADF test on MU is  0.4547967670790441
The p-value for the ADF test on AVGO is  0.9594796854371349
The p-value for the ADF test on TXN is  0.8104674553114753
The p-value for the ADF test on GLW is  0.31689572938294996
The p-value for the ADF test on WDC is  0.2567812379035717
The p-value for the ADF test on AAPL is  0.9679901691668595
The p-value for the ADF test on MMM is  0.17393968117077635
The p-value for the ADF test on BA is  0.4124168407482042
The p-value for the ADF test on PG is  0.9440275764100345
The p-value for the ADF test on AMAT is  0.8344203686797362
The p-value for the ADF test on ABT is  0.6978377846716866
The p-value for the ADF test on NKE is  0.6579612426694753
The p-value for the ADF test on QCOM is  0.7662105013199096


# Amphenol Corp. Class A
APH1d = np.diff(APH.adjustedclose)
# Intel Corp.
INTC1d = np.diff(INTC.adjustedclose)
# Micron Technology Inc.
MU1d = np.diff(MU.adjustedclose)
# Broadcom Ltd.
AVGO1d = np.diff(AVGO.adjustedclose)
# Texas Instruments Inc.
TXN1d = np.diff(TXN.adjustedclose)
# Corning Inc
GLW1d = np.diff(GLW.adjustedclose)
# Western Digital Corp.
WDC1d = np.diff(WDC.adjustedclose)
# Apple Inc.
AAPL1d = np.diff(AAPL.adjustedclose)
# 3M co.
MMM1d = np.diff(MMM.adjustedclose)
# Boeing Co.
BA1d = np.diff(BA.adjustedclose)
# Procter & Gamble co.
PG1d = np.diff(PG.adjustedclose)
# Applied Materials Inc.
AMAT1d = np.diff(AMAT.adjustedclose)
# Abbott Laboratories
ABT1d = np.diff(ABT.adjustedclose)
# Nike Inc. Class B
NKE1d = np.diff(NKE.adjustedclose)
# Qualcomm Inc.
QCOM1d = np.diff(QCOM.adjustedclose)


tsplot(AAPL1d)
# plt.savefig('Figure21.png')


dic = {'APH1d': APH1d, 
       'INTC1d': INTC1d,
       'MU1d': MU1d,
       'AVGO1d': AVGO1d,
       'TXN1d': TXN1d,
       'GLW1d': GLW1d,
       'WDC1d': WDC1d,
       'AAPL1d': AAPL1d,
       'MMM1d': MMM1d,
       'BA1d': BA1d,
       'PG1d': PG1d,
       'AMAT1d': AMAT1d,
       'ABT1d': ABT1d,
       'NKE1d': NKE1d,
       'QCOM1d': QCOM1d}

diff_stock1 = pd.DataFrame(dic).set_index(APH.index[1:])


diff_stock1.corr()


# Use AD Fuller test to detect cointegration among two sample first-difference stock price
from statsmodels.tsa.stattools import adfuller
adf1 = adfuller(MU1d - PG1d)
print('The p-value for the ADF test on the spread between MU and PG first-difference stock data is ', adf1[1])

The p-value for the ADF test on the spread between MU and PG first-difference stock data is  0.0


%%sql result_set8 << 
select fort.symbol, t20share.companyname, fort.date, fort.adjustedclose, spg.sector, spg.industry
from fortune500stockprice fort
inner join TOP20Companies_ShareofSaleinChina t20share
on fort.symbol = t20share.symbol
inner join SP500withGICS spg
on t20share.symbol = spg.symbol
where date >= '2017-01-01';

 * postgresql://postgres:***@127.0.0.1:5433/6354Database
16116 rows affected.
Returning data to local variable result_set8


df8 = result_set8.DataFrame()
df8['date'] = df8['date'].astype('datetime64')
df8['adjustedclose'] = df8['adjustedclose'].astype('float')
df8.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16116 entries, 0 to 16115
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   symbol         16116 non-null  object        
 1   companyname    16116 non-null  object        
 2   date           16116 non-null  datetime64[ns]
 3   adjustedclose  16116 non-null  float64       
 4   sector         16116 non-null  object        
 5   industry       16116 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(4)
memory usage: 755.6+ KB


df8.symbol.unique()

array(['APH', 'INTC', 'MU', 'AVGO', 'TXN', 'GLW', 'WDC', 'NVDA', 'AMD',
       'AAPL', 'AMAT', 'QCOM'], dtype=object)


df8.companyname.unique()

array(['Amphenol Corp. Class A', 'Intel Corp.', 'Micron Technology Inc.',
       'Broadcom Ltd.', 'Texas Instruments Inc.', 'Corning Inc',
       'Western Digital Corp.', 'Nvidia Corp.',
       'Advanced Micro Devices Inc.', 'Apple Inc.',
       'Applied Materials Inc.', 'Qualcomm Inc.'], dtype=object)


s3 = pd.Series(df8['symbol'].unique())
s4 = pd.Series(df8.companyname.unique())
company_list2 = zip(s3, s4)
list(company_list2)

[('APH', 'Amphenol Corp. Class A'),
 ('INTC', 'Intel Corp.'),
 ('MU', 'Micron Technology Inc.'),
 ('AVGO', 'Broadcom Ltd.'),
 ('TXN', 'Texas Instruments Inc.'),
 ('GLW', 'Corning Inc'),
 ('WDC', 'Western Digital Corp.'),
 ('NVDA', 'Nvidia Corp.'),
 ('AMD', 'Advanced Micro Devices Inc.'),
 ('AAPL', 'Apple Inc.'),
 ('AMAT', 'Applied Materials Inc.'),
 ('QCOM', 'Qualcomm Inc.')]


plt.figure(figsize = (10, 8))
sns.lineplot(x="date", y = 'adjustedclose', hue="symbol", palette = 'Paired', data = df8)
plt.xlabel('Date')
plt.ylabel('Adjusted Close Stock Price')
plt.title('Time-Series Stock Price Plot for 12 Companies with Highest Share of Sale in China', fontsize = 16)
plt.show()


plt.figure(figsize = (10, 8))
sns.lineplot(x="date", y = 'adjustedclose', hue="sector", palette = 'bright', data = df8)
plt.xlabel('Date')
plt.ylabel('Adjusted Close Stock Price')
plt.title('Time-Series Stock Price Plot for 12 Companies with Highest Share of Sale in China', fontsize = 16)
plt.show()


# Amphenol Corp. Class A
APH = df8[df8.symbol == 'APH'].set_index(['date'])
# Intel Corp.
INTC = df8[df8.symbol == 'INTC'].set_index(['date'])
# Micron Technology Inc.
MU = df8[df8.symbol == 'MU'].set_index(['date'])
# Broadcom Ltd.
AVGO = df8[df8.symbol == 'AVGO'].set_index(['date'])
# Texas Instruments Inc.
TXN = df8[df8.symbol == 'TXN'].set_index(['date'])
# Corning Inc
GLW = df8[df8.symbol == 'GLW'].set_index(['date'])
# Western Digital Corp.
WDC = df8[df8.symbol == 'WDC'].set_index(['date'])
# Nvidia Corp.
NVDA = df8[df8.symbol == 'NVDA'].set_index(['date'])
# Advanced Micro Devices Inc.
AMD = df8[df8.symbol == 'AMD'].set_index(['date'])
# Apple Inc.
AAPL = df8[df8.symbol == 'AAPL'].set_index(['date'])
# Applied Materials Inc.
AMAT = df8[df8.symbol == 'AMAT'].set_index(['date'])
# Qualcomm Inc.
QCOM = df8[df8.symbol == 'QCOM'].set_index(['date'])


# Amphenol Corp. Class A
APH1d = np.diff(APH.adjustedclose)
# Intel Corp.
INTC1d = np.diff(INTC.adjustedclose)
# Micron Technology Inc.
MU1d = np.diff(MU.adjustedclose)
# Broadcom Ltd.
AVGO1d = np.diff(AVGO.adjustedclose)
# Texas Instruments Inc.
TXN1d = np.diff(TXN.adjustedclose)
# Corning Inc
GLW1d = np.diff(GLW.adjustedclose)
# Western Digital Corp.
WDC1d = np.diff(WDC.adjustedclose)
# Nvidia Corp.
NVDA1d = np.diff(NVDA.adjustedclose)
# Advanced Micro Devices Inc.
AMD1d = np.diff(AMD.adjustedclose)
# Apple Inc.
AAPL1d = np.diff(AAPL.adjustedclose)
# Applied Materials Inc.
AMAT1d = np.diff(AMAT.adjustedclose)
# Qualcomm Inc.
QCOM1d = np.diff(QCOM.adjustedclose)


dic2 = {'APH1d': APH1d, 
       'INTC1d': INTC1d,
       'MU1d': MU1d,
       'AVGO1d': AVGO1d,
       'TXN1d': TXN1d,
       'GLW1d': GLW1d,
       'WDC1d': WDC1d,
       'NVDA1d': NVDA1d,
       'AMD1d': AMD1d,
       'AAPL1d': AAPL1d,
       'AMAT1d': AMAT1d,
       'QCOM1d': QCOM1d}

diff_stock2 = pd.DataFrame(dic2).set_index(APH.index[1:])


diff_stock2.corr()


all_stock = result_set6.DataFrame()
#all_stock['30day_ave_close'] = all_stock.adjustedclose.rolling(30).mean().shift(-3)
#all_stock = all_stock[all_stock['date'] >= '2017-02-09']
all_stock['date'] = all_stock['date'].astype('datetime64')
all_stock['adjustedclose'] = all_stock['adjustedclose'].astype('float')


## Extract stock price time-series data for each company in the sample
# Google
GOOG = all_stock[all_stock.symbol == 'GOOG'].set_index(['date'])
# Facebook (Meta)
FB = all_stock[all_stock.symbol == 'FB'].set_index(['date'])
# JPMorgan Chase & Co
JPM = all_stock[all_stock.symbol == 'JPM'].set_index(['date'])
# Bank of America Corp
BAC = all_stock[all_stock.symbol == 'BAC'].set_index(['date'])
# Cincinnati Financial -- Financials company
CINF = all_stock[all_stock.symbol == 'CINF'].set_index(['date'])
# Discover Financial Services -- Financials company
DFS = all_stock[all_stock.symbol == 'DFS'].set_index(['date']) 
# Duke Realty Corp -- Real Estate
DRE = all_stock[all_stock.symbol == 'DRE'].set_index(['date'])
# Healthpeak Properties -- Real Estate
PEAK = all_stock[all_stock.symbol == 'PEAK'].set_index(['date'])
# Hormel Foods Corp. -- Consumer Staples
HRL = all_stock[all_stock.symbol == 'HRL'].set_index(['date'])
# Lamb Weston Holdings Inc -- Consumer Staples
LW = all_stock[all_stock.symbol == 'LW'].set_index(['date'])
# Las Vegas Sands -- Consumer Discretionary
LVS = all_stock[all_stock.symbol == 'LVS'].set_index(['date'])
# Marriott Int'l. -- Consumer Discretionary
MAR = all_stock[all_stock.symbol == 'MAR'].set_index(['date'])


adf_APH = adfuller(GOOG.adjustedclose)
print('The p-value for the ADF test on GOOG is ', adf_APH[1])
adf_INTC = adfuller(FB.adjustedclose)
print('The p-value for the ADF test on FB is ', adf_INTC[1])
adf_MU = adfuller(JPM.adjustedclose)
print('The p-value for the ADF test on JPM is ', adf_MU[1])
adf_AVGO = adfuller(BAC.adjustedclose)
print('The p-value for the ADF test on BAC is ', adf_AVGO[1])
adf_TXN = adfuller(CINF.adjustedclose)
print('The p-value for the ADF test on CINF is ', adf_TXN[1])
adf_GLW = adfuller(DFS.adjustedclose)
print('The p-value for the ADF test on DFS is ', adf_GLW[1])
adf_WDC = adfuller(DRE.adjustedclose)
print('The p-value for the ADF test on DRE is ', adf_WDC[1])
adf_AAPL = adfuller(PEAK.adjustedclose)
print('The p-value for the ADF test on PEAK is ', adf_AAPL[1])
adf_MMM = adfuller(HRL.adjustedclose)
print('The p-value for the ADF test on HRL is ', adf_MMM[1])
adf_BA = adfuller(LW.adjustedclose)
print('The p-value for the ADF test on LW is ', adf_BA[1])
adf_PG = adfuller(LVS.adjustedclose)
print('The p-value for the ADF test on LVS is ', adf_PG[1])
adf_AMAT = adfuller(MAR.adjustedclose)
print('The p-value for the ADF test on MAR is ', adf_AMAT[1])

The p-value for the ADF test on GOOG is  0.9206452216305152
The p-value for the ADF test on FB is  0.4408830489570079
The p-value for the ADF test on JPM is  0.5164123596562111
The p-value for the ADF test on BAC is  0.527076862960515
The p-value for the ADF test on CINF is  0.6181210041301123
The p-value for the ADF test on DFS is  0.8236884846098547
The p-value for the ADF test on DRE is  0.9630232739124333
The p-value for the ADF test on PEAK is  0.7101643261104784
The p-value for the ADF test on HRL is  0.7724075446506207
The p-value for the ADF test on LW is  0.07904250900005483
The p-value for the ADF test on LVS is  0.17728913574082866
The p-value for the ADF test on MAR is  0.29830008378976064


# Amphenol Corp. Class A
GOOG1d = np.diff(GOOG.adjustedclose)
# Intel Corp.
FB1d = np.diff(FB.adjustedclose)
# Micron Technology Inc.
JPM1d = np.diff(JPM.adjustedclose)
# Broadcom Ltd.
BAC1d = np.diff(BAC.adjustedclose)
# Texas Instruments Inc.
CINF1d = np.diff(CINF.adjustedclose)
# Corning Inc
DFS1d = np.diff(DFS.adjustedclose)
# Western Digital Corp.
DRE1d = np.diff(DRE.adjustedclose)
# Apple Inc.
PEAK1d = np.diff(PEAK.adjustedclose)
# 3M co.
HRL1d = np.diff(HRL.adjustedclose)
# Boeing Co.
LW1d = np.diff(LW.adjustedclose)
# Procter & Gamble co.
LVS1d = np.diff(LVS.adjustedclose)
# Applied Materials Inc.
MAR1d = np.diff(MAR.adjustedclose)


dic3 = {'GOOG1d': GOOG1d, 
       'FB1d': FB1d,
       'JPM1d': JPM1d,
       'BAC1d': BAC1d,
       'CINF1d': CINF1d,
       'DFS1d': DFS1d,
       'DRE1d': DRE1d,
       'PEAK1d': PEAK1d,
       'HRL1d': HRL1d,
       'LW1d': LW1d,
       'LVS1d': LVS1d,
       'MAR1d': MAR1d}

diff_stock3 = pd.DataFrame(dic3).set_index(GOOG.index[1:])


diff_stock3.corr()


# Use AD Fuller test to detect cointegration among two sample first-difference stock price
from statsmodels.tsa.stattools import adfuller
adf1 = adfuller(GOOG1d - HRL1d)
print('The p-value for the ADF test on the spread between GOOG and HRL first-difference stock data is ', adf1[1])

The p-value for the ADF test on the spread between GOOG and HRL first-difference stock data is  7.498268890224783e-14

	countryname	countrycode	year	gdp	population	tariffall	tariffmanufactured
0	Barbados	BRB	2017-01-01	4978000000.00	286229.00	None	None
1	Aruba	ABW	2015-01-01	2962905028.00	104339.00	10.02	11.09
2	Afghanistan	AFG	2015-01-01	19134211764.00	34413603.00	None	None
3	Albania	ALB	2015-01-01	11386850130.00	2880703.00	3.58	0.66
4	Algeria	DZA	2015-01-01	165979279263.00	39728020.00	12.02	7.66

	countryname	year	imports	exports	balance
0	Afghanistan	2015-01-01	23601251	478711524	455110273
1	Afghanistan	2016-01-01	33715926	913874351	880158425
2	Afghanistan	2017-01-01	14423622	942232031	927808409
3	Afghanistan	2018-01-01	29408728	1227404432	1197995704
4	Afghanistan	2019-01-01	38744074	757769159	719025085

	countryname	countrycode	year	gdp	population	tariffall	tariffmanufactured	imports	exports	balance
35	Canada	CAN	2015-01-01	1556508.816217	35.702908	3.11	1.12	296000	280855	-15145
41	China	CHN	2015-01-01	11061553.079872	1379.86	5.58	5.70	483000	115873	-367127
67	France	FRA	2015-01-01	2439188.643163	66.548272	3.09	2.26	47808.780079	30026.310133	-17782.469946
99	Japan	JPN	2015-01-01	4444930.651964	127.141	2.83	1.47	131000	62387.809646	-68612.190354
108	Germany	DEU	2015-01-01	3357585.719352	81.686611	3.09	2.26	125000	49978.833642	-75021.166358

	countryname	countrycode	year	gdp	population	tariffall	tariffmanufactured	imports	exports	balance
35	Canada	CAN	2015-01-01	1556508.816217	35.702908	3.11	1.12	296000	280855	-15145
41	China	CHN	2015-01-01	11061553.079872	1379.86	5.58	5.70	483000	115873	-367127
67	France	FRA	2015-01-01	2439188.643163	66.548272	3.09	2.26	47808.780079	30026.310133	-17782.469946
99	Japan	JPN	2015-01-01	4444930.651964	127.141	2.83	1.47	131000	62387.809646	-68612.190354
108	Germany	DEU	2015-01-01	3357585.719352	81.686611	3.09	2.26	125000	49978.833642	-75021.166358

	tariffaction	date	chinesetariffsonusexports	chinesetariffsonrowexports	ustariffsonchineseexports	ustariffsonrowexports	chineseexportssubjecttoustariffs	usexportssubjecttochinesetariffs	tradewar	phaseoneagreement
0	1-Jan-18	2018-01-01	8.0	8.0	3.1	2.2	0.0	0.0	NaN	NaN
1	US Section 201 tariffs on solar panels and was...	2018-02-07	8.0	8.0	3.2	2.2	0.2	0.0	NaN	NaN
2	US Section 232 tariffs on steel and aluminum, ...	2018-03-23	8.0	8.0	3.8	2.5	0.8	0.0	NaN	NaN
3	China's retaliation to US Section 232 tariffs	2018-04-02	8.0	8.4	3.8	2.5	0.8	1.9	NaN	NaN
4	China's MFN tariff cut on pharmaceuticals	2018-05-01	8.0	8.3	3.8	2.5	0.8	1.9	NaN	NaN

Information Management Database Project Report¶

Database Topic: US-China Trade Database¶

Database Introduction¶

Background Introduction¶

US-China Trade Database Schema¶

Database Implementation¶

Using sql "magic" to connect to Postgresql¶

Data Analysis 1 -- Exploring Annual Country GDP and Tariffs and US Trade with China Compared to Other Countries¶

Data Analysis 2 -- Exploring US-China Trade by Commodity and Sectors¶

Data Analysis 3 -- Exploring US-China Trade Tariff Data during Trade War Period¶

Data Analysis 4 -- Exploring the S&P 500 Companies with GICS classification¶

Data Analysis 5 -- Exploring Company Revenue Data by Sectors¶

Data Analysis 6 -- Exploring the Stock Price of S&P Companies by Sectors¶

Data Analysis 7 -- Exploring the Stock Price for 15 Companies with Highest Level of Sale in China¶

Data Extraction¶

Data Visualization¶

Plot for Stock Price of 15 Companies with Highest Level of Sale in China by Company¶

Plot for Stock Price of 15 Companies with Highest Level of Sale in China by Sector¶

Data Analysis¶

Time Series Plot for Stock Price and Daily Changes of Stock Price¶

Data Extraction¶

Data Visualization¶

Data Analysis¶

Data Analysis 9 -- Exploring the Stock Price for 12 Companies Randomly Selected from the S&P 500 Company List¶

References¶

Data Sources¶

	Number of Companies in that location
hqlocation
California	78
New York	56
Texas	38
Illinois	34
Massachusetts	21
Ohio	19
Georgia	17
Pennsylvania	17
New Jersey	16
North Carolina	16

	Number of Companies in the sector
sector
Information Technology	74
Industrials	73
Financials	65
Health Care	63
Consumer Discretionary	61
Consumer Staples	32
Real Estate	30
Materials	28
Utilities	28
Communication Services	26
Energy	25

	APH1d	INTC1d	MU1d	AVGO1d	TXN1d	GLW1d	WDC1d	AAPL1d	MMM1d	BA1d	PG1d	AMAT1d	ABT1d	NKE1d	QCOM1d
APH1d	1.000000	0.554443	0.586354	0.682525	0.695802	0.687701	0.515437	0.577050	0.529536	0.506577	0.403491	0.635977	0.484340	0.547721	0.593024
INTC1d	0.554443	1.000000	0.568658	0.537787	0.635236	0.491718	0.477131	0.462769	0.406278	0.404452	0.320036	0.536106	0.426510	0.341716	0.473530
MU1d	0.586354	0.568658	1.000000	0.610272	0.663791	0.477479	0.675265	0.452169	0.328463	0.385651	0.215110	0.707532	0.321977	0.407201	0.557458
AVGO1d	0.682525	0.537787	0.610272	1.000000	0.732737	0.515984	0.454706	0.626259	0.330900	0.364109	0.262114	0.693326	0.419737	0.419180	0.669334
TXN1d	0.695802	0.635236	0.663791	0.732737	1.000000	0.595286	0.503309	0.579147	0.405305	0.385474	0.340874	0.723983	0.450565	0.447785	0.641393
GLW1d	0.687701	0.491718	0.477479	0.515984	0.595286	1.000000	0.453816	0.421575	0.534043	0.442728	0.323734	0.486482	0.408717	0.424017	0.448999
WDC1d	0.515437	0.477131	0.675265	0.454706	0.503309	0.453816	1.000000	0.321376	0.371951	0.394987	0.209516	0.498751	0.262745	0.347887	0.371309
AAPL1d	0.577050	0.462769	0.452169	0.626259	0.579147	0.421575	0.321376	1.000000	0.290707	0.325890	0.339192	0.531770	0.430177	0.427189	0.579035
MMM1d	0.529536	0.406278	0.328463	0.330900	0.405305	0.534043	0.371951	0.290707	1.000000	0.420525	0.368972	0.263141	0.379677	0.359714	0.251334
BA1d	0.506577	0.404452	0.385651	0.364109	0.385474	0.442728	0.394987	0.325890	0.420525	1.000000	0.229176	0.343780	0.276824	0.369038	0.298859
PG1d	0.403491	0.320036	0.215110	0.262114	0.340874	0.323734	0.209516	0.339192	0.368972	0.229176	1.000000	0.214167	0.488965	0.324010	0.250163
AMAT1d	0.635977	0.536106	0.707532	0.693326	0.723983	0.486482	0.498751	0.531770	0.263141	0.343780	0.214167	1.000000	0.319375	0.379573	0.660180
ABT1d	0.484340	0.426510	0.321977	0.419737	0.450565	0.408717	0.262745	0.430177	0.379677	0.276824	0.488965	0.319375	1.000000	0.369688	0.343556
NKE1d	0.547721	0.341716	0.407201	0.419180	0.447785	0.424017	0.347887	0.427189	0.359714	0.369038	0.324010	0.379573	0.369688	1.000000	0.405742
QCOM1d	0.593024	0.473530	0.557458	0.669334	0.641393	0.448999	0.371309	0.579035	0.251334	0.298859	0.250163	0.660180	0.343556	0.405742	1.000000

	GOOG1d	FB1d	JPM1d	BAC1d	CINF1d	DFS1d	DRE1d	PEAK1d	HRL1d	LW1d	LVS1d	MAR1d
GOOG1d	1.000000	0.622545	0.401207	0.407539	0.336213	0.409778	0.409614	0.314902	0.105119	0.272933	0.410428	0.415559
FB1d	0.622545	1.000000	0.262858	0.268749	0.186599	0.294676	0.295006	0.243506	0.075703	0.191262	0.318553	0.270208
JPM1d	0.401207	0.262858	1.000000	0.910234	0.633412	0.766945	0.392430	0.427986	0.149557	0.467209	0.530752	0.554273
BAC1d	0.407539	0.268749	0.910234	1.000000	0.614743	0.762158	0.357351	0.364705	0.142291	0.446114	0.520625	0.550728
CINF1d	0.336213	0.186599	0.633412	0.614743	1.000000	0.524034	0.513596	0.469031	0.303622	0.426689	0.328106	0.412689
DFS1d	0.409778	0.294676	0.766945	0.762158	0.524034	1.000000	0.331441	0.403606	0.074661	0.476002	0.510200	0.590446
DRE1d	0.409614	0.295006	0.392430	0.357351	0.513596	0.331441	1.000000	0.692310	0.372041	0.336983	0.259185	0.277077
PEAK1d	0.314902	0.243506	0.427986	0.364705	0.469031	0.403606	0.692310	1.000000	0.244895	0.458210	0.329508	0.366395
HRL1d	0.105119	0.075703	0.149557	0.142291	0.303622	0.074661	0.372041	0.244895	1.000000	0.166792	0.051794	0.029817
LW1d	0.272933	0.191262	0.467209	0.446114	0.426689	0.476002	0.336983	0.458210	0.166792	1.000000	0.387316	0.430735
LVS1d	0.410428	0.318553	0.530752	0.520625	0.328106	0.510200	0.259185	0.329508	0.051794	0.387316	1.000000	0.606739
MAR1d	0.415559	0.270208	0.554273	0.550728	0.412689	0.590446	0.277077	0.366395	0.029817	0.430735	0.606739	1.000000

Information Management Database Project Report¶

Database Topic: US-China Trade Database¶

Database Introduction¶

Background Introduction¶

US-China Trade Database Schema¶

Database Implementation¶

Using sql "magic" to connect to Postgresql¶

Data Analysis 1 -- Exploring Annual Country GDP and Tariffs and US Trade with China Compared to Other Countries¶

Data Analysis 2 -- Exploring US-China Trade by Commodity and Sectors¶

Data Analysis 3 -- Exploring US-China Trade Tariff Data during Trade War Period¶

Data Analysis 4 -- Exploring the S&P 500 Companies with GICS classification¶

Data Analysis 5 -- Exploring Company Revenue Data by Sectors¶

Data Analysis 6 -- Exploring the Stock Price of S&P Companies by Sectors¶

Data Analysis 7 -- Exploring the Stock Price for 15 Companies with Highest Level of Sale in China¶

Data Extraction¶

Data Visualization¶

Plot for Stock Price of 15 Companies with Highest Level of Sale in China by Company¶

Plot for Stock Price of 15 Companies with Highest Level of Sale in China by Sector¶

Data Analysis¶

Time Series Plot for Stock Price and Daily Changes of Stock Price¶

Data Analysis 8 -- Exploring the Stock Price for Top 12 Companies with Highest Share of Sale in China¶

Data Extraction¶

Data Visualization¶

Time-Series Stock Price Plot for 12 Companies with Highest Share of Sale in China¶

Time-Series Stock Price Plot for 12 Companies with Highest Share of Sale in China¶

Data Analysis¶

Data Analysis 9 -- Exploring the Stock Price for 12 Companies Randomly Selected from the S&P 500 Company List¶

References¶

Data Sources¶